-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdoc_2_pdf.py
92 lines (70 loc) · 2.44 KB
/
doc_2_pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# DOCXtoPDF.py
# Author: Vasudev Ram - http://www.dancingbison.com
# Copyright 2012 Vasudev Ram, http://www.dancingbison.com
# This is open source code, released under the New BSD License -
# see http://www.opensource.org/licenses/bsd-license.php .
# This program uses the python-docx library, available at:
# https://github.com/mikemaccana/python-docx
import sys
import os
import os.path
import string
from textwrap import TextWrapper
from docx import opendocx, getdocumenttext
from PDFWriter import PDFWriter
def docx_to_pdf(infilename, outfilename):
# Extract the text from the DOCX file object infile and write it to
# a PDF file.
try:
infil = opendocx(infilename)
except:
print("Error opening infilename")
#print "Exception: " + repr(e) + "\n"
sys.exit(1)
paragraphs = getdocumenttext(infil)
pw = PDFWriter(outfilename)
pw.setFont("Courier", 12)
pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF")
pw.setFooter("Generated by xtopdf and python-docx")
wrapper = TextWrapper(width=70, drop_whitespace=False)
# For Unicode handling.
new_paragraphs = []
for paragraph in paragraphs:
new_paragraphs.append(paragraph.encode("utf-8"))
for paragraph in new_paragraphs:
lines = wrapper.wrap(paragraph)
for line in lines:
pw.writeLine(line)
pw.writeLine("")
pw.savePage()
pw.close()
def usage():
return "Usage: python DOCXtoPDF.py infile.docx outfile.txt\n"
def main():
try:
# Check for correct number of command-line arguments.
if len(sys.argv) != 3:
print ("Wrong number of arguments")
#print usage()
sys.exit(1)
infilename = sys.argv[1]
outfilename = sys.argv[2]
# Check for right infilename extension.
infile_ext = os.path.splitext(infilename)[1]
if infile_ext.upper() != ".DOCX":
print ("Input filename extension should be .DOCX")
#print usage()
sys.exit(1)
# Check for right outfilename extension.
outfile_ext = os.path.splitext(outfilename)[1]
if outfile_ext.upper() != ".PDF":
print( "Output filename extension should be .PDF")
#print usage()
sys.exit(1)
docx_to_pdf(infilename, outfilename)
except:
sys.stderr.write("Error: " + repr(e) + "\n")
sys.exit(1)
if __name__ == '__main__':
main()
# EOF