-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4.py
59 lines (47 loc) · 1.95 KB
/
4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from bs4 import BeautifulSoup
import requests
def save():
with open('1.txt', 'a', encoding="utf-8") as file:
file.write(f'{comp["title"]} -> description: {comp["description"]} -> Price: {comp["price"]} -> img: {comp["img"]}\n')
def get_text(item, name=None, attrs={}, recursive=True, text=None, **kwargs):
text = ''
try:
text = item.find(name, attrs, recursive, text, **kwargs).get_text(strip = True)
except:
pass
return text
def get_image_src(item, name=None, attrs={}, recursive=True, text=None, **kwargs):
text = ''
try:
text = item.find(name, attrs, recursive, text, **kwargs).get('src')
except:
pass
return text
def parse(line):
URL = line.strip()
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(URL, headers = HEADERS)
soup = BeautifulSoup(response.content, 'html.parser')
# поиск в определнном блоке на странице items = soup.findAll('div', class_ = 'offer-wrapper' )
items = soup.findAll('body') # поиск по всей странице
comps = []
for item in items:
NameS = get_text(item, attrs={"data-qaid": 'product_name'})
priceS = get_text(item, 'p', class_ = 'b-product-cost__price')
descriptionS = get_text(item, 'div', class_ = 'b-user-content')
imgSrc = get_image_src(item, 'img', class_ = 'cs-product-image__img')
comps.append({
'title': NameS,
'price': priceS,
'img': imgSrc,
'description': descriptionS
})
global comp
for comp in comps:
print(f'{comp["title"]} -> description: {comp["description"]} -> Price: {comp["price"]} -> img: {comp["img"]}')
save()
with open("3.txt", "r", encoding="utf-8") as file1:
for line in file1:
parse (line)