-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
108 lines (85 loc) · 4.48 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import csv
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from colorama import Fore, Style
# Prompt the user for headless or normal mode
mode = input("Do you want to run the script in headless mode? (yes/no): ").strip().lower()
# Set up the Chrome options for headless mode if the user selects headless
chrome_options = Options()
if mode == "yes":
chrome_options.add_argument("--headless") # Run in headless mode
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
else:
print(Fore.CYAN + "Running in normal mode...")
# Initialize the WebDriver
driver = webdriver.Chrome(options=chrome_options)
# Ensure the CSVs directory exists
if not os.path.exists('CSVs'):
os.makedirs('CSVs')
# Function to process each page
def process_page(url):
try:
print(Fore.CYAN + f"Processing {url}...")
# Open the webpage
driver.get(url)
# Wait for the page to load by checking for the main wrapper element
try:
WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, '//div[@class="wrapper"]'))
)
# Optional: Additional time to ensure page is fully rendered
time.sleep(2)
# Try to extract the h2 tag (or handle missing element)
try:
quote_cards = driver.find_element(By.XPATH, '//div[@class="quote-card"]')
h2_tag = quote_cards.find_element(By.TAG_NAME, 'h1').text
# Sanitize the h2_tag to create a valid file name
file_name = f"CSVs/{h2_tag.replace(' ', '_').replace('/', '_').replace('\\', '_')}.csv"
# Prepare the CSV file for writing
with open(file_name, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Image Link", "Title", "Price"])
# Find the items and extract their details
items_container = driver.find_element(By.XPATH, '//div[@class="wrapper"]')
items = items_container.find_elements(By.XPATH, '//div[@class="card"]')
if not items:
print(Fore.YELLOW + f"No products found on {url}. Moving to the next link...")
return
for index, item in enumerate(items):
try:
img_element = item.find_element(By.XPATH, './/img[@class="preview-image"]')
img_link = img_element.get_attribute("src")
title_element = item.find_element(By.XPATH, './/h1[@class="card-title"]')
title_text = title_element.text
price_element = item.find_element(By.XPATH, './/div[@class="card-body"]')
price_text = price_element.find_element(By.TAG_NAME, 'h2').text
# Write item data to CSV
writer.writerow([img_link, title_text, price_text])
print(Fore.GREEN + f"Produit {index + 1}:")
print(Fore.GREEN + f" Image Link: {img_link}")
print(Fore.GREEN + f" Title: {title_text}")
print(Fore.GREEN + f" Price: {price_text}")
except Exception as e:
print(Fore.RED + f"Error processing item {index + 1}: {e}")
print(Fore.BLUE + f"Data exported to {file_name}")
except Exception as e:
print(Fore.YELLOW + f"No content found or error extracting details on {url}. Error: {e}. Moving to the next link...")
except Exception as e:
print(Fore.RED + f"Error loading page {url}: {e}. Moving to the next link...")
except Exception as e:
print(Fore.RED + f"Error processing {url}: {e}. Moving to the next link...")
# Loop through categories 121 to 127, excluding 123
for category in range(121, 128):
if category == 123: # Skip category 123
continue
for i in range(700, 751):
url = f"https://www.elkolla.scanini.tn/client/categories/{category}/{i}"
process_page(url)
# Close the webdriver after all processing is done
driver.quit()