-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcss-selectors.r
45 lines (27 loc) · 1.19 KB
/
css-selectors.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#SELECTOR EXAMPLES USING RVEST
library(rvest)
website <- read_html("https://blueshift.io/selectors2.html")
#CSS Selector: Tag
allListItems <- html_nodes(website,"li") %>% html_text()
allListItems
#CSS Selector: Class
firstItems <- html_nodes(website,".item1") %>% html_text()
firstItems
#CSS Selector: ID
fruitList <- html_nodes(website,"#fruits") %>% html_text()
fruitList
#CSS Selector: ID
cssExample <- html_nodes(website,"#programming-languages .item2") %>% html_text()
cssExample
#****SELECT BY XPATH****
xpathExample <- html_nodes(website, xpath = "//*[@id='fruits']/li[1]") %>% html_text()
xpathExample
#****Scrape a HTML table into a data frame****
#https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population
wikipediaPage <- read_html("https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population")
# find the CSS selector of the table using the Web Inspector Network tab
# "#mw-content-text > div > table:nth-child(12)"
wikiTableElement <- html_nodes(wikipediaPage,"#mw-content-text > div > table:nth-child(12)")
wikiTable <- html_table(wikiTableElement, fill = TRUE) %>%
data.frame()
View(wikiTable)