diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9235e91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/bin +/obj +/.vs \ No newline at end of file diff --git a/BiblionetkaScraper2.csproj b/BiblionetkaScraper2.csproj new file mode 100644 index 0000000..c015ff8 --- /dev/null +++ b/BiblionetkaScraper2.csproj @@ -0,0 +1,16 @@ + + + + Exe + net8.0 + enable + enable + true + true + + + + + + + diff --git a/BiblionetkaScraper2.sln b/BiblionetkaScraper2.sln new file mode 100644 index 0000000..c09421c --- /dev/null +++ b/BiblionetkaScraper2.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.10.35004.147 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BiblionetkaScraper2", "BiblionetkaScraper2.csproj", "{5C82BB35-922F-4850-97CE-4A0898008EBB}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {66C1FE94-EAEB-47DE-9542-BF4B007B4442} + EndGlobalSection +EndGlobal diff --git a/Program.cs b/Program.cs new file mode 100644 index 0000000..71ca954 --- /dev/null +++ b/Program.cs @@ -0,0 +1,48 @@ +namespace BiblionetkaScraper2 +{ + internal class Program + { + static void Main(string[] args) + { + Console.WriteLine("Hello, World!"); + + var output = Path.GetDirectoryName(args[0]) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(args[0]) + ".csv"; + foreach(var a in args) + Scrap(a, output); + } + + private static void Scrap(string inputFile, string outputFile) + { + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.Load(inputFile); + + var notes = doc.DocumentNode.SelectSingleNode("/html/body/form/div[3]/section[4]/div[2]/div[1]/div[2]/div/div/div[5]") + .Descendants() + .Where(node => node.GetAttributeValue("class", "").Contains("row forum__list")) + .ToList(); + + var path = outputFile; + using var file = File.Exists(path) ? File.Open(path, FileMode.Append) : File.Open(path, FileMode.CreateNew); + using var stream = new StreamWriter(file); + foreach (var n in notes) + { + var note = n.SelectSingleNode("div[1]/a[1]"); + var bookName = '"' + note.InnerText.Replace("\"", "'") + '"'; + + var bookNote = note.GetAttributeValue("title", "").Replace("Twoja ocena: ", "").Replace(",", "."); + + var authors = n.SelectNodes("div[1]/a") + .Skip(1) + .Where(node => !node.GetAttributeValue("class", "").Contains("icon")) + .Select(n => n.InnerText); + + var authorsJoined = authors.Count() > 1 ? string.Join(",", authors) : authors.FirstOrDefault(); + var bookAuthors = '"' + authorsJoined + '"'; + + string line = bookName + "," + bookAuthors + "," + bookNote; + Console.WriteLine(line); + stream.WriteLine(line); + } + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3816f1 Binary files /dev/null and b/README.md differ