-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
61 lines (49 loc) · 2.58 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import asyncio
import logging
from scraper import scrape_data, extract_listing, extract_listing_elements
from utils import get_search_list, save_data, update_query_file, merge_excel_files, parse_coordinates
from playwright.async_api import async_playwright
import time
import data
# Configure logging
logging.basicConfig(filename='error_log.log', level=logging.ERROR,
format='%(asctime)s - %(levelname)s - %(message)s')
async def main():
try:
search_list = await get_search_list()
total = 25 # Can set up to 150, Max 120 Recommended
async with async_playwright() as p:
start_time = time.time()
browser = await p.chromium.launch(executable_path='C:\Program Files\Google\Chrome\Application\chrome.exe',headless=False, args=['--lang=en-US'])
page = await browser.new_page()
await page.goto("https://www.google.com/maps?hl=en", timeout=60000)
await page.wait_for_timeout(3000)
for search_for in search_list:
try:
print(f"------ {search_for} ------")
await page.locator('//input[@id="searchboxinput"]').fill(search_for)
await page.wait_for_timeout(3000)
await page.keyboard.press("Enter")
await page.wait_for_timeout(3000)
## await page.hover('//a[contains(@href, "https://www.google.com/maps/place")]') - removed as it was causing issues
await page.wait_for_selector('//a[contains(@href, "https://www.google.com/maps/place")]') # Added to wait for the element to appear
listings = await scrape_data(page, total)
print(f'Processing on: {listings}')
await extract_listing(page, listings)
await extract_listing_elements()
parse_coordinates()
save_data(search_for)
# Clear data after saving
for key in data.data.keys():
data.data[key].clear()
except Exception as e:
logging.error(f"Error processing search '{search_for}': {e}")
continue # Skip to the next task
end_time = time.time()
print(f"Scraping took {(end_time - start_time) / 60:.2f} minutes.")
if len(search_list) > 1:
merge_excel_files()
except Exception as e:
logging.error(f"An error occurred in the main process: {e}")
if __name__ == "__main__":
asyncio.run(main())