-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcheck_filesstatus.py
68 lines (59 loc) · 3.02 KB
/
check_filesstatus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import psutil # for accessing the process utilities !
from urllib.request import urlopen # If there is a webpage to be downloaded
from pdfminer.pdfinterp import PDFResourceManager, \
process_pdf # For creating the resource manager and processor for the pdf
from pdfminer.converter import TextConverter # For the conversion of the Text in the pdf
from pdfminer.layout import LAParams # Taking the Linear layout in mind
from io import StringIO # String IO for conversion to string representation
def get_all_running_files():
file_list = [] # This gets the list of the files
for proc in psutil.process_iter(): # going through all of the processes to get the list of the running processes
try:
if len(proc.open_files()) != 0: # If there is a process running
file_list.append(proc.open_files()) # adding in the file so that the data can be used.
except:
continue
return file_list # this returns the file list
def get_pdf_files(file_list=get_all_running_files()):
pdf_files_location = []
for list in file_list:
for inner_list in list:
if str(inner_list).split(',')[0].__contains__('.pdf'): # Splitting on the basis of commas as it will be providing the data !
if str(inner_list).split(',')[0][16:-1].endswith('StandardBusiness.pdf'):
continue
else:
pdf_files_location.append(str(inner_list).split(',')[0][16:-1]) # this wil get the file location
return pdf_files_location # Returns the list for the pdf file location !
# print(pdf_file_list)
def readPDF( pdfFile):
try:
rsrcmgr = PDFResourceManager() # Creates the resource manager
# resource_mang = PDFResourceManager()
retstr = StringIO() # string object for the representation of the pdf
# string represetnation from string input and output module
laparams = LAParams() # Parameters Object Creation
device = TextConverter(rsrcmgr, retstr, laparams=laparams) # Creating the device for the conversion
process_pdf(rsrcmgr, device, pdfFile) # Process the specific pdf, to convert into string representations
device.close() # Closes the device.
# print(retstr) # Debuggin
# Decoded value is returned here UTF-8
content = retstr.getvalue() # gets the text from the string object
# print(content)5
return content # Returns the content where its called
except Exception as Ex:
print("While reading the file , there was an error in the function Readodf as :", Ex)
# printing the exception
def filter_data(data_value):
data_value = str(data_value).replace("\\\\\\\\", "/")
return data_value
#
def test():
# K = get_all_running_files()
# for i in K:
# print(i)
K = get_pdf_files()
K = filter_data(K[0]) # Filtering the data for the K
file_read = open(K, "rb")
data = readPDF(file_read)
new_data = ""
test()