Skip to content

Commit

Permalink
feat: scrap courses
Browse files Browse the repository at this point in the history
  • Loading branch information
D0dii committed Sep 18, 2024
1 parent 9116465 commit 3032c84
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 16 deletions.
105 changes: 91 additions & 14 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"name": "web-planner",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "next dev",
"build": "next build",
Expand All @@ -13,7 +14,8 @@
"format:check": "prettier --check .",
"knip": "knip",
"typecheck": "tsc",
"prepare": "husky"
"prepare": "husky",
"scrap": "npx tsx src/lib/scrapRegistrations.ts"
},
"dependencies": {
"@radix-ui/react-accordion": "^1.2.0",
Expand All @@ -25,7 +27,7 @@
"@radix-ui/themes": "^3.1.3",
"@t3-oss/env-nextjs": "^0.11.0",
"@tanstack/react-query": "^5.54.1",
"cheerio": "^1.0.0-rc.12",
"cheerio": "^1.0.0",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",
"fetch-cookie": "^3.0.1",
Expand Down
101 changes: 101 additions & 0 deletions src/lib/scrapRegistrations.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// eslint-disable-next-line @eslint-community/eslint-comments/disable-enable-pair

/* eslint-disable no-console */
import * as cheerio from "cheerio";

const DEPARTMENTS_URL =
"https://web.usos.pwr.edu.pl/kontroler.php?_action=news/rejestracje/index";

const fetchData = async (url: string) => {
const response = await fetch(url);
return response;
};

const scrapDepartments = async () => {
const departmentsNames: string[] = [];
const departmentsUrls: string[] = [];
const response = await fetchData(DEPARTMENTS_URL);
if (!response.ok) {
console.log("Something went wrong in fetching departments");
return;
}
const body = await response.text();
const $ = cheerio.load(body);

const departments = $("div#layout-c22").find(".autostrong").children("tr");
departments.each((index, element) => {
departmentsNames.push($(element).find("td").html()?.trim());

Check failure on line 27 in src/lib/scrapRegistrations.ts

View workflow job for this annotation

GitHub Actions / lint

Argument of type 'string | undefined' is not assignable to parameter of type 'string'.
departmentsUrls.push($(element).find("a").attr("href"));

Check failure on line 28 in src/lib/scrapRegistrations.ts

View workflow job for this annotation

GitHub Actions / lint

Argument of type 'string | undefined' is not assignable to parameter of type 'string'.
});
return { departmentsNames, departmentsUrls };
};

const scrapRegistrations = async (departmentUrl: string) => {
const registrationsNames: string[] = [];
const registrationsUrls: string[] = [];
const response = await fetchData(departmentUrl);
if (!response.ok) {
console.log("Something went wrong in fetching registrations");
return;
}
const body = await response.text();
const $ = cheerio.load(body);

const registrations = $("main#layout-main-content")
.find("#layout-c22")
.find("div.usos-ui");
const h2 = registrations.children("h2");
const names = registrations.children("usos-link");
h2.each((index, element) => {
registrationsNames.push($(element).text().trim());
});
names.each((index, element) => {
registrationsUrls.push($(element).find("a").attr("href"));

Check failure on line 53 in src/lib/scrapRegistrations.ts

View workflow job for this annotation

GitHub Actions / lint

Argument of type 'string | undefined' is not assignable to parameter of type 'string'.
});
return { registrationsNames, registrationsUrls };
};

const scrapCourses = async (registrationUrl: string) => {
const coursesNames: string[] = [];
const coursesUrls: string[] = [];
const response = await fetchData(registrationUrl);
if (!response.ok) {
console.log("Something went wrong in fetching courses");
return;
}

const body = await response.text();
const $ = cheerio.load(body);

const courses = $("main#layout-main-content")
.find("table.wrnav")
.find("tbody")
.children("tr");
courses.each((index, element) => {
const a = $(element).find("usos-link").find("a").attr("href");
console.log(a);
});
};

//'https://web.usos.pwr.edu.pl/kontroler.php?_action=katalog2/przedmioty/szukajPrzedmiotu&method=rej&rej_kod=W09ZARZ-SI7-24%2F25Zv&callback=g_f04839bf'

const main = async () => {
const { departmentsNames, departmentsUrls } = await scrapDepartments();

const registrations: string[][] = [];
const allRegistrationsUrls: string[][] = [];

for (const department of departmentsUrls) {
const { registrationsNames, registrationsUrls } =
await scrapRegistrations(department);
registrations.push(registrationsNames);
allRegistrationsUrls.push(registrationsUrls);
}
console.log(registrations, allRegistrationsUrls);
};

//void main();
//test scrapCourses
void scrapCourses(
"https://web.usos.pwr.edu.pl/kontroler.php?_action=katalog2/przedmioty/szukajPrzedmiotu&method=rej&rej_kod=W09ZARZ-SI7-24%2F25Zv&callback=g_f04839bf",
);

0 comments on commit 3032c84

Please sign in to comment.