-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscraper.js
45 lines (40 loc) · 1.61 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
const puppeteer = require("puppeteer");
async function scrapeGoogleSearchResults(userInput) {
const browser = await puppeteer.launch({ headless: true }); // Set headless: true for no GUI
const page = await browser.newPage();
await page.goto("https://www.google.com/");
await page.type("textarea[name=q]", userInput); // Correct the selector to 'input[name=q]'
await page.keyboard.press("Enter");
await page.waitForNavigation();
const searchResults = await page.evaluate(() => {
const links = Array.from(document.querySelectorAll("h3")).map((anchor) => ({
title: anchor.innerText,
url: anchor.parentElement.href,
}));
return links.slice(0, 10);
});
const resultsData = [];
for (const result of searchResults) {
console.log(`Visiting: ${result.url}`);
try {
const newPage = await browser.newPage();
await newPage.goto(result.url, { waitUntil: "networkidle0" });
const data = await newPage.evaluate(() => {
const articleTitle = document.title;
const metaKeywords = document.querySelector('meta[name="keywords"]')
? document.querySelector('meta[name="keywords"]').content
: null;
const articleContent = document.body.innerText;
return { articleTitle, metaKeywords, articleContent };
});
console.log(data); // or push to resultsData
resultsData.push({ url: result.url, ...data });
await newPage.close();
} catch (error) {
console.error(`Error visiting ${result.url}: ${error.message}`);
}
}
await browser.close();
return resultsData;
}
module.exports = scrapeGoogleSearchResults;