Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
plyr0 committed Aug 27, 2024
0 parents commit 276d9c6
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/bin
/obj
/.vs
16 changes: 16 additions & 0 deletions BiblionetkaScraper2.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<PublishAot>true</PublishAot>
<InvariantGlobalization>true</InvariantGlobalization>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.64" />
</ItemGroup>

</Project>
25 changes: 25 additions & 0 deletions BiblionetkaScraper2.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.10.35004.147
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BiblionetkaScraper2", "BiblionetkaScraper2.csproj", "{5C82BB35-922F-4850-97CE-4A0898008EBB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5C82BB35-922F-4850-97CE-4A0898008EBB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5C82BB35-922F-4850-97CE-4A0898008EBB}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {66C1FE94-EAEB-47DE-9542-BF4B007B4442}
EndGlobalSection
EndGlobal
48 changes: 48 additions & 0 deletions Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
namespace BiblionetkaScraper2
{
internal class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello, World!");

var output = Path.GetDirectoryName(args[0]) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(args[0]) + ".csv";
foreach(var a in args)
Scrap(a, output);
}

private static void Scrap(string inputFile, string outputFile)
{
var doc = new HtmlAgilityPack.HtmlDocument();
doc.Load(inputFile);

var notes = doc.DocumentNode.SelectSingleNode("/html/body/form/div[3]/section[4]/div[2]/div[1]/div[2]/div/div/div[5]")
.Descendants()
.Where(node => node.GetAttributeValue("class", "").Contains("row forum__list"))
.ToList();

var path = outputFile;
using var file = File.Exists(path) ? File.Open(path, FileMode.Append) : File.Open(path, FileMode.CreateNew);
using var stream = new StreamWriter(file);
foreach (var n in notes)
{
var note = n.SelectSingleNode("div[1]/a[1]");
var bookName = '"' + note.InnerText.Replace("\"", "'") + '"';

var bookNote = note.GetAttributeValue("title", "").Replace("Twoja ocena: ", "").Replace(",", ".");

var authors = n.SelectNodes("div[1]/a")
.Skip(1)
.Where(node => !node.GetAttributeValue("class", "").Contains("icon"))
.Select(n => n.InnerText);

var authorsJoined = authors.Count() > 1 ? string.Join(",", authors) : authors.FirstOrDefault();
var bookAuthors = '"' + authorsJoined + '"';

string line = bookName + "," + bookAuthors + "," + bookNote;
Console.WriteLine(line);
stream.WriteLine(line);
}
}
}
}
Binary file added README.md
Binary file not shown.

0 comments on commit 276d9c6

Please sign in to comment.