From 276d9c69f9f487a1a14d03df36a69b7aa829599f Mon Sep 17 00:00:00 2001
From: plyr0 <9119209+plyr0@users.noreply.github.com>
Date: Tue, 27 Aug 2024 14:59:45 +0200
Subject: [PATCH] first commit
---
.gitignore | 3 +++
BiblionetkaScraper2.csproj | 16 +++++++++++++
BiblionetkaScraper2.sln | 25 +++++++++++++++++++
Program.cs | 48 +++++++++++++++++++++++++++++++++++++
README.md | Bin 0 -> 566 bytes
5 files changed, 92 insertions(+)
create mode 100644 .gitignore
create mode 100644 BiblionetkaScraper2.csproj
create mode 100644 BiblionetkaScraper2.sln
create mode 100644 Program.cs
create mode 100644 README.md
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9235e91
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/bin
+/obj
+/.vs
\ No newline at end of file
diff --git a/BiblionetkaScraper2.csproj b/BiblionetkaScraper2.csproj
new file mode 100644
index 0000000..c015ff8
--- /dev/null
+++ b/BiblionetkaScraper2.csproj
@@ -0,0 +1,16 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+ true
+ true
+
+
+
+
+
+
+
diff --git a/BiblionetkaScraper2.sln b/BiblionetkaScraper2.sln
new file mode 100644
index 0000000..c09421c
--- /dev/null
+++ b/BiblionetkaScraper2.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.10.35004.147
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BiblionetkaScraper2", "BiblionetkaScraper2.csproj", "{5C82BB35-922F-4850-97CE-4A0898008EBB}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {66C1FE94-EAEB-47DE-9542-BF4B007B4442}
+ EndGlobalSection
+EndGlobal
diff --git a/Program.cs b/Program.cs
new file mode 100644
index 0000000..71ca954
--- /dev/null
+++ b/Program.cs
@@ -0,0 +1,48 @@
+namespace BiblionetkaScraper2
+{
+ internal class Program
+ {
+ static void Main(string[] args)
+ {
+ Console.WriteLine("Hello, World!");
+
+ var output = Path.GetDirectoryName(args[0]) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(args[0]) + ".csv";
+ foreach(var a in args)
+ Scrap(a, output);
+ }
+
+ private static void Scrap(string inputFile, string outputFile)
+ {
+ var doc = new HtmlAgilityPack.HtmlDocument();
+ doc.Load(inputFile);
+
+ var notes = doc.DocumentNode.SelectSingleNode("/html/body/form/div[3]/section[4]/div[2]/div[1]/div[2]/div/div/div[5]")
+ .Descendants()
+ .Where(node => node.GetAttributeValue("class", "").Contains("row forum__list"))
+ .ToList();
+
+ var path = outputFile;
+ using var file = File.Exists(path) ? File.Open(path, FileMode.Append) : File.Open(path, FileMode.CreateNew);
+ using var stream = new StreamWriter(file);
+ foreach (var n in notes)
+ {
+ var note = n.SelectSingleNode("div[1]/a[1]");
+ var bookName = '"' + note.InnerText.Replace("\"", "'") + '"';
+
+ var bookNote = note.GetAttributeValue("title", "").Replace("Twoja ocena: ", "").Replace(",", ".");
+
+ var authors = n.SelectNodes("div[1]/a")
+ .Skip(1)
+ .Where(node => !node.GetAttributeValue("class", "").Contains("icon"))
+ .Select(n => n.InnerText);
+
+ var authorsJoined = authors.Count() > 1 ? string.Join(",", authors) : authors.FirstOrDefault();
+ var bookAuthors = '"' + authorsJoined + '"';
+
+ string line = bookName + "," + bookAuthors + "," + bookNote;
+ Console.WriteLine(line);
+ stream.WriteLine(line);
+ }
+ }
+ }
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a3816f19ede00c1819c6895a6f3942e488dde4ca
GIT binary patch
literal 566
zcmaixO-{ow5QSeB*&uNMc9GaZkXW+J2`X^_)U;J_nuaP)Xz}5JZ^m?igoG@+&dmGs
z^y~9hl^O-L8aUrHYNuIyt@W&pCVr2s9k)ka>&o{ET=Gs()S@1Z2y1fw2VaT(N@a!{
z^`r;Z$ybfjUv4$X0n`HC++N&!20N;wR^u1JoKfz%@A1kW2Y3~zHRlojz!}IM(5b1d
zP)gjbQWhP^;E8)`mAhE?z`dovRVpg9BfAA_E?GOc=cpU{|4l5QI?s6FEii2IHkCWqv;WH8W6n)s
bzL);*OeOEyx>nAYb*<7AWwH6mXfyo=Gg)Xq
literal 0
HcmV?d00001