-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
132 lines (113 loc) · 5.18 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import sys
import pandas as pd
import data
from openpyxl import load_workbook
async def get_search_list():
try:
choice = input("Would you like to input search term manually (1) or use Query.txt (2)? ")
if choice == '1':
search_for = input("Please enter your search term: ")
return [search_for.strip()]
elif choice == '2':
input_file_name = 'Query.txt'
input_file_path = os.path.join(os.getcwd(), input_file_name)
if os.path.exists(input_file_path):
with open(input_file_path, 'r') as file:
return [line.strip() for line in file.readlines()]
else:
print(f'Error: {input_file_name} not found.')
sys.exit()
else:
print('Invalid choice. Exiting.')
sys.exit()
except Exception as e:
print(f"An error occurred while getting the search list: {e}")
sys.exit()
def save_data(search_for):
try:
# Ensure all lists in data.data are of the same length
min_length = min(len(data.data[key]) for key in data.data.keys())
for key in data.data.keys():
if len(data.data[key]) > min_length:
data.data[key] = data.data[key][:min_length]
map_data = {
'Names': data.data['names'], 'Address': data.data['addresses'], 'Plus Code': data.data['plus_code'], 'Phone Number': data.data['phones'],
'Website': data.data['websites'], 'Google Link': data.data['links'],
'Latitude': data.data['latitudes'], 'Longitude': data.data['longitudes'],
'Reviews_Count': data.data['reviews_count'], 'Average Rates': data.data['rates'], 'Type': data.data['type']
}
df = pd.DataFrame(map_data)
output_folder = 'output'
if not os.path.exists(output_folder):
os.makedirs(output_folder)
filename = search_for.replace(' ', '_').lower()
file_path = os.path.join(output_folder, f'{filename}.xlsx')
df.to_excel(file_path, index=False)
# Remove the processed line from Query.txt
update_query_file(search_for)
except Exception as e:
print(f"An error occurred while saving data: {e}")
def update_query_file(search_for):
try:
input_file_name = 'Query.txt'
input_file_path = os.path.join(os.getcwd(), input_file_name)
if os.path.exists(input_file_path):
with open(input_file_path, 'r') as file:
lines = file.readlines()
# Remove the processed search term
lines = [line for line in lines if line.strip() != search_for]
with open(input_file_path, 'w') as file:
file.writelines(lines)
else:
print(f'Error: {input_file_name} not found.')
except Exception as e:
print(f"An error occurred while updating the query file: {e}")
def merge_excel_files():
try:
output_folder = 'output'
all_files = [os.path.join(output_folder, f) for f in os.listdir(output_folder) if f.endswith('.xlsx')]
combined_df = pd.concat([pd.read_excel(f) for f in all_files])
combined_df.drop_duplicates(subset=['Google Link'], inplace=True)
output_file = os.path.join(output_folder, 'merged_output.xlsx')
combined_df.to_excel(output_file, index=False)
print("Merged file saved as 'merged_output.xlsx'.")
# Adjust column widths
adjust_column_width(output_file)
except Exception as e:
print(f"An error occurred while merging Excel files: {e}")
def adjust_column_width(file_path):
try:
# Load the workbook and the sheet
workbook = load_workbook(file_path)
worksheet = workbook.active
# Iterate over all columns and set the column width based on the maximum length of content
for col in worksheet.columns:
max_length = 0
col_letter = col[0].column_letter # Get the column letter (A, B, C, etc.)
for cell in col:
try:
if cell.value:
max_length = max(max_length, len(str(cell.value)))
except:
pass
adjusted_width = max_length + 2 # Add some padding
worksheet.column_dimensions[col_letter].width = adjusted_width
# Save the workbook after adjusting column widths
workbook.save(file_path)
print("Column widths adjusted for better viewing.")
except Exception as e:
print(f"An error occurred while adjusting column widths: {e}")
def parse_coordinates():
try:
for coordinate in data.data['links']:
try:
# Split the link by '@' and take the part after the last '@'
parts = coordinate.split('@')[-1].split(',')
data.data['latitudes'].append(parts[0])
data.data['longitudes'].append(parts[1])
except IndexError:
data.data['latitudes'].append(None)
data.data['longitudes'].append(None)
except Exception as e:
print(f"An error occurred while parsing coordinates: {e}")