-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperson_data_from_google_search
112 lines (73 loc) · 2.72 KB
/
person_data_from_google_search
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#import parameters
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import mysql.connector
def content(s) :
c = -1
ans = ""
s = str(s)
for i in s :
if i == "<" :
c = -1
if c != -1 :
ans += str(i)
if i == ">" :
c = 0
index = 0
for i in range (len(ans)) :
if ans[i].isalpha() :
index = i
break
return ans[index:]
mydb = mysql.connector.connect(
host="***********************************************************" , # please enter your sql host name
user= "**********************************************************" , # please enter your user name
password= "******************************************************" , # please enter your password
database="*******************************************************" # please enter your database name
)
mycursor = mydb.cursor()
driver = webdriver.Chrome('C:/Users/Abhishek Choudhary/.spyder-py3/chromedriver.exe')
driver.get("https://www.linkedin.com/")
username = input("your emaild")
password = input("please enter your passwords")
username_input = driver.find_element_by_css_selector("input[name = 'session_key']")
password_input = driver.find_element_by_css_selector("input[name='session_password']")
username_input.send_keys(username)
password_input.send_keys(password)
login_button = driver.find_element_by_xpath("//button[@type='submit']")
login_button.click()
sleep(2)
driver.get('https:www.google.com')
sleep(3)
search_query = driver.find_element_by_name('q')
search_query.send_keys("Ankur Choudhary linkedin")
sleep(0.5)
search_query.send_keys(Keys.RETURN)
sleep(3)
source = driver.page_source
data=bs(source, 'html.parser')
data = data.find_all('div', class_='yuRUbf')
links = []
for i in data :
datas = i.find('a')
link = datas.get('href')
if link.find("https://www.linkedin.com/pub/dir/") != 0 :
links.append(link)
for Link in links :
driver.get(Link)
source = driver.page_source
data=bs(source, 'html.parser')
name = data.find('li', class_='inline t-24 t-black t-normal break-words')
name = content(name)
bio = data.find('h2', class_='mt1 t-18 t-black t-normal break-words')
bio = content(bio)
address = data.find('li', class_='t-16 t-black t-normal inline-block')
address = content(address)
sql = "INSERT INTO customers (name, address, bio, link) VALUES (%s, %s, %s, %s)"
val = (name, address, bio, Link)
mycursor.execute(sql, val)
mydb.commit()
print(mycursor.rowcount, "record inserted.")
driver.quit()