From 276d9c69f9f487a1a14d03df36a69b7aa829599f Mon Sep 17 00:00:00 2001 From: plyr0 <9119209+plyr0@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:59:45 +0200 Subject: [PATCH] first commit --- .gitignore | 3 +++ BiblionetkaScraper2.csproj | 16 +++++++++++++ BiblionetkaScraper2.sln | 25 +++++++++++++++++++ Program.cs | 48 +++++++++++++++++++++++++++++++++++++ README.md | Bin 0 -> 566 bytes 5 files changed, 92 insertions(+) create mode 100644 .gitignore create mode 100644 BiblionetkaScraper2.csproj create mode 100644 BiblionetkaScraper2.sln create mode 100644 Program.cs create mode 100644 README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9235e91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/bin +/obj +/.vs \ No newline at end of file diff --git a/BiblionetkaScraper2.csproj b/BiblionetkaScraper2.csproj new file mode 100644 index 0000000..c015ff8 --- /dev/null +++ b/BiblionetkaScraper2.csproj @@ -0,0 +1,16 @@ + + + + Exe + net8.0 + enable + enable + true + true + + + + + + + diff --git a/BiblionetkaScraper2.sln b/BiblionetkaScraper2.sln new file mode 100644 index 0000000..c09421c --- /dev/null +++ b/BiblionetkaScraper2.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.10.35004.147 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BiblionetkaScraper2", "BiblionetkaScraper2.csproj", "{5C82BB35-922F-4850-97CE-4A0898008EBB}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {66C1FE94-EAEB-47DE-9542-BF4B007B4442} + EndGlobalSection +EndGlobal diff --git a/Program.cs b/Program.cs new file mode 100644 index 0000000..71ca954 --- /dev/null +++ b/Program.cs @@ -0,0 +1,48 @@ +namespace BiblionetkaScraper2 +{ + internal class Program + { + static void Main(string[] args) + { + Console.WriteLine("Hello, World!"); + + var output = Path.GetDirectoryName(args[0]) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(args[0]) + ".csv"; + foreach(var a in args) + Scrap(a, output); + } + + private static void Scrap(string inputFile, string outputFile) + { + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.Load(inputFile); + + var notes = doc.DocumentNode.SelectSingleNode("/html/body/form/div[3]/section[4]/div[2]/div[1]/div[2]/div/div/div[5]") + .Descendants() + .Where(node => node.GetAttributeValue("class", "").Contains("row forum__list")) + .ToList(); + + var path = outputFile; + using var file = File.Exists(path) ? File.Open(path, FileMode.Append) : File.Open(path, FileMode.CreateNew); + using var stream = new StreamWriter(file); + foreach (var n in notes) + { + var note = n.SelectSingleNode("div[1]/a[1]"); + var bookName = '"' + note.InnerText.Replace("\"", "'") + '"'; + + var bookNote = note.GetAttributeValue("title", "").Replace("Twoja ocena: ", "").Replace(",", "."); + + var authors = n.SelectNodes("div[1]/a") + .Skip(1) + .Where(node => !node.GetAttributeValue("class", "").Contains("icon")) + .Select(n => n.InnerText); + + var authorsJoined = authors.Count() > 1 ? string.Join(",", authors) : authors.FirstOrDefault(); + var bookAuthors = '"' + authorsJoined + '"'; + + string line = bookName + "," + bookAuthors + "," + bookNote; + Console.WriteLine(line); + stream.WriteLine(line); + } + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a3816f19ede00c1819c6895a6f3942e488dde4ca GIT binary patch literal 566 zcmaixO-{ow5QSeB*&uNMc9GaZkXW+J2`X^_)U;J_nuaP)Xz}5JZ^m?igoG@+&dmGs z^y~9hl^O-L8aUrHYNuIyt@W&pCVr2s9k)ka>&o{ET=Gs()S@1Z2y1fw2VaT(N@a!{ z^`r;Z$ybfjUv4$X0n`HC++N&!20N;wR^u1JoKfz%@A1kW2Y3~zHRlojz!}IM(5b1d zP)gjbQWhP^;E8)`mAhE?z`dovRVpg9BfAA_E?GOc=cpU{|4l5QI?s6FEii2IHkCWqv;WH8W6n)s bzL);*OeOEyx>nAYb*<7AWwH6mXfyo=Gg)Xq literal 0 HcmV?d00001