Skip to content

Commit

Permalink
cleaned up wp sites
Browse files Browse the repository at this point in the history
  • Loading branch information
Yucked committed Oct 15, 2023
1 parent 43041c6 commit e125d68
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 159 deletions.
149 changes: 17 additions & 132 deletions Grimoire.Sources/Sources/ArenaScansSource.cs
Original file line number Diff line number Diff line change
@@ -1,153 +1,38 @@
using System.Text.Json;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using Grimoire.Commons;
using Grimoire.Commons.Interfaces;
using Grimoire.Commons.Models;
using Grimoire.Sources.Helpers;
using Microsoft.Extensions.Logging;

namespace Grimoire.Sources.Sources;

public class ArenaScansSource : IGrimoireSource {
public class ArenaScansSource(
ILogger<ArenaScansSource> logger,
HtmlParser htmlParser) : IGrimoireSource {
public string Name
=> "Arena Scans";

public string Url
=> "https://arenascans.net";
=> "https://team11x11.fun/";

public string Icon
=> $"{Url}/favicon.ico";

private readonly ILogger<ArenaScansSource> _logger;
private readonly HtmlParser _htmlParser;

protected static readonly char[] Separators = { ',', '|' };
protected static readonly string[] AltStrings = { "Alternative Titles", "desktop-titles" };

public ArenaScansSource(ILogger<ArenaScansSource> logger, HtmlParser htmlParser) {
_logger = logger;
_htmlParser = htmlParser;
public Task<IReadOnlyList<Manga>> GetMangasAsync() {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangasAsync();
}

public async Task<IReadOnlyList<Manga>> GetMangasAsync() {
using var document = await _htmlParser.ParseAsync($"{Url}/manga/list-mode");
var results = document
.QuerySelectorAll("div.soralist > * a.series")
.AsParallel()
.Select(x => GetMangaAsync((x as IHtmlAnchorElement).Href));
return await Task.WhenAll(results);
public Task<Manga> GetMangaAsync(string url) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangaAsync(url);
}

public async Task<Manga> GetMangaAsync(string url) {
using var document = await _htmlParser.ParseAsync(url);

_logger.LogInformation("Fetching information for: {}", url);
var manga = new Manga {
Name = document.QuerySelector("h1.entry-title[itemprop='name']").TextContent,
Url = url,
SourceId = Name.GetIdFromName(),
LastFetch = DateTimeOffset.Now,
Cover = document.QuerySelector("img.wp-post-image").As<IHtmlImageElement>().Source,
Chapters = document.GetElementById("chapterlist")
.FirstChild
.ChildNodes
.Where(x => x is IHtmlListItemElement)
.Select(x => {
var element = x as IHtmlElement;
return new Chapter {
Name = element.GetElementsByClassName("chapternum").FirstOrDefault().TextContent.Clean(),
Url = x.FindDescendant<IHtmlAnchorElement>().Href,
ReleasedOn = DateOnly.Parse(
element.GetElementsByClassName("chapterdate").FirstOrDefault().TextContent)
};
})
.ToArray()
};

try {
manga.Metonyms = document
.GetElementsByClassName("alternative")
.FirstOrDefault()
?.TextContent
.Slice(Separators);

manga.Summary = document.QuerySelector("*[itemprop='description']")
!.Descendents<IHtmlParagraphElement>()
.Select(x => x.TextContent.Clean().Trim())
.Join();

manga.Genre = document
.QuerySelector("div.wd-full > span.mgen")
?.TextContent
.Slice(' ');

manga.Author = document
.QuerySelectorAll("div.tsinfo > div.imptdt")
.FirstOrDefault(x => x.TextContent.Clean().Trim()[..6] == "Author")
?.TextContent
.Slice(' ')[1..]
.Join()
.Clean()
?.Trim();
}
catch (Exception exception) {
_logger.LogError("{}: {}\n{}\n{}",
manga.Name,
manga.Url,
exception.Message,
exception);
}

return manga;
}

public async Task<Chapter> FetchChapterAsync(Chapter chapter) {
try {
using var document = await _htmlParser.ParseAsync(chapter.Url);
var chapterId = document.Head
.Descendents<IHtmlLinkElement>()
.First(x =>
x.Type == "application/json" &&
x.Relation == "alternate")
.Href
.Split('/')[^1];

var parsedChapters = document
.GetElementById("readerarea")!
.Descendents<IHtmlImageElement>()
.Select(x => x.Source)
.ToArray();

var htmlChapters = (await GetChapterDocumentAsync())
.Descendents<IHtmlImageElement>()
.Select(x => x.Source)
.ToArray();

chapter.Pages = htmlChapters.Length > parsedChapters.Length
? htmlChapters
: parsedChapters;

return chapter;

async Task<IDocument> GetChapterDocumentAsync() {
var content =
await _htmlParser.GetContentAsync($"{Url}/wp-json/wp/v2/posts/{chapterId}");
await using var stream = await content.ReadAsStreamAsync();
using var jsonDocument = await JsonDocument.ParseAsync(stream);
var html = jsonDocument.RootElement
.GetProperty("content")
.GetProperty("rendered")
.GetString();
return await _htmlParser.ParseHtmlAsync(html);
}
}
catch (Exception exception) {
_logger.LogError("{}: {}\n{}\n{}",
chapter.Name,
chapter.Url,
exception.Message,
exception);
throw;
}
public Task<Chapter> FetchChapterAsync(Chapter chapter) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.FetchChapterAsync(chapter);
}
}
35 changes: 24 additions & 11 deletions Grimoire.Sources/Sources/AsuraScansSource.cs
Original file line number Diff line number Diff line change
@@ -1,26 +1,39 @@
using Grimoire.Commons;
using Grimoire.Commons.Interfaces;
using Grimoire.Commons.Models;
using Grimoire.Sources.Helpers;
using Microsoft.Extensions.Logging;

namespace Grimoire.Sources.Sources;

// "img.alignnone"
public class AsuraScansSource : ArenaScansSource, IGrimoireSource {
public new string Name
public class AsuraScansSource(
ILogger<AsuraScansSource> logger,
HtmlParser htmlParser) : IGrimoireSource {
public string Name
=> "Asura Scans";

public new string Url
=> "https://www.asurascans.com";
public string Url
=> "https://asuratoon.com";

public new string Icon
public string Icon
=> $"{Url}/wp-content/uploads/2021/03/Group_1.png";

private readonly HtmlParser _htmlParser;
private readonly ILogger<AsuraScansSource> _logger;
public Task<IReadOnlyList<Manga>> GetMangasAsync() {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangasAsync();
}

public Task<Manga> GetMangaAsync(string url) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangaAsync(url);
}

public AsuraScansSource(ILogger<AsuraScansSource> logger, HtmlParser htmlParser)
: base(logger, htmlParser) {
_logger = logger;
_htmlParser = htmlParser;
public Task<Chapter> FetchChapterAsync(Chapter chapter) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.FetchChapterAsync(chapter);
}
}
33 changes: 23 additions & 10 deletions Grimoire.Sources/Sources/FlameScansSource.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
using Grimoire.Commons;
using Grimoire.Commons.Interfaces;
using Grimoire.Commons.Models;
using Grimoire.Sources.Helpers;
using Microsoft.Extensions.Logging;

namespace Grimoire.Sources.Sources;

public class FlameScansSource : ArenaScansSource, IGrimoireSource {
public new string Name
public class FlameScansSource(
ILogger<ArenaScansSource> logger,
HtmlParser htmlParser) : IGrimoireSource {
public string Name
=> "Flame Scans";

public new string Url
public string Url
=> "https://flamescans.org";

public new string Icon
public string Icon
=> $"{Url}/favicon.ico";

private readonly HtmlParser _htmlParser;
private readonly ILogger<FlameScansSource> _logger;
public Task<IReadOnlyList<Manga>> GetMangasAsync() {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangasAsync("series");
}

public Task<Manga> GetMangaAsync(string url) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangaAsync(url);
}

public FlameScansSource(ILogger<FlameScansSource> logger, HtmlParser htmlParser)
: base(logger, htmlParser) {
_logger = logger;
_htmlParser = htmlParser;
public Task<Chapter> FetchChapterAsync(Chapter chapter) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.FetchChapterAsync(chapter);
}
}
31 changes: 25 additions & 6 deletions Grimoire.Sources/Sources/RavenScansSource.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
using Grimoire.Commons;
using Grimoire.Commons.Interfaces;
using Grimoire.Commons.Models;
using Grimoire.Sources.Helpers;
using Microsoft.Extensions.Logging;

namespace Grimoire.Sources.Sources;

// "img.ts-main-image"
public class RavenScansSource : ArenaScansSource, IGrimoireSource {
public new string Name
public class RavenScansSource(
ILogger<ArenaScansSource> logger,
HtmlParser htmlParser) : IGrimoireSource {
public string Name
=> "Raven Scans";

public new string Url
public string Url
=> "https://ravenscans.com";

public new string Icon
public string Icon
=> "https://i0.wp.com/ravenscans.com/wp-content/uploads/2022/12/cropped-33.png";

public RavenScansSource(ILogger<RavenScansSource> logger, HtmlParser htmlParser)
: base(logger, htmlParser) { }
public Task<IReadOnlyList<Manga>> GetMangasAsync() {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangasAsync();
}

public Task<Manga> GetMangaAsync(string url) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.GetMangaAsync(url);
}

public Task<Chapter> FetchChapterAsync(Chapter chapter) {
return WordPressHelper
.Helper(logger, htmlParser, Name, Url)
.FetchChapterAsync(chapter);
}
}

0 comments on commit e125d68

Please sign in to comment.