Skip to content

Commit

Permalink
Make QuarkiverseIO fetch docs from the GH artifacts
Browse files Browse the repository at this point in the history
  • Loading branch information
marko-bekhta committed Jan 16, 2025
1 parent 2502a37 commit f72f9ef
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 96 deletions.
28 changes: 28 additions & 0 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,34 @@ INDEXING_REPORTING_GITHUB_ISSUE_ID={github-issue-id-in-your-repository}
INDEXING_REPORTING_GITHUB_WARNING_REPEAT_DELAY=10m
----

=== Updating the built-in docs.quarkiverse.io sample

By default, the app will use the built-in docs.quarkiverse.io sample containing a snapshot of docs.quarkiverse.io.

Quarkiverse guides are indexed from the GitHub workflow artifact created as part of
the https://github.com/quarkiverse/quarkiverse-docs/actions/workflows/publish.yml[publish action].
To update the included sample: go to the https://github.com/quarkiverse/quarkiverse-docs/actions/workflows/publish.yml?query=is%3Asuccess[page],
open the first run at the top (latest successful execution) and download the `github-pages` at the bottom of the page.

GitHub has a 100mb limit on the files we can include in the repository. This means that the downloaded artifact has to be cleaned up
a bit. Unpack the archive and remove some of the extensions (in particular try dropping the ones taking the most space, e.g. `quarkus-backstage`.).
Make sure to keep Amazon S3 guides as those are using in tests.
Once you are done removing extensions, archive remaining guides back and replace the existing file.

=== Downloading the latest docs.quarkiverse.io artifact

If during the development there's a need to use a fresh docs.quarkiverse.io artifact downloaded by the app itself during the indexing process,
add the following properties to your `.env` file:

[source,properties]
----
# see about tokens https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens
QUARKIVERSEIO_GITHUB_ARTIFACT_TOKEN={your-generated-github-token}
QUARKIVERSEIO_GITHUB_ARTIFACT_REPOSITORY=quarkiverse/quarkiverse-docs
QUARKIVERSEIO_GITHUB_ARTIFACT_ACTION_NAME=Publish website
QUARKIVERSEIO_GITHUB_ARTIFACT_ARTIFACT_NAME=github-pages
----

[[testing]]
== Testing

Expand Down
9 changes: 9 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@
<artifactId>lodash</artifactId>
<version>4.17.13</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.27.1</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
Expand Down Expand Up @@ -204,6 +209,10 @@
<groupId>io.quarkiverse.helm</groupId>
<artifactId>quarkus-helm</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</dependency>

<!-- Web Dependencies -->
<dependency>
Expand Down
49 changes: 47 additions & 2 deletions src/main/java/io/quarkus/search/app/fetching/FetchingService.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package io.quarkus.search.app.fetching;

import static io.quarkus.search.app.util.FileUtils.untar;
import static io.quarkus.search.app.util.FileUtils.unzip;

import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.Map;
Expand Down Expand Up @@ -32,6 +34,12 @@
import org.hibernate.search.util.common.impl.Closer;
import org.hibernate.search.util.common.impl.SuppressingCloser;

import org.kohsuke.github.GHArtifact;
import org.kohsuke.github.GHRepository;
import org.kohsuke.github.GHWorkflowRun;
import org.kohsuke.github.GitHub;
import org.kohsuke.github.GitHubBuilder;

import io.vertx.core.impl.ConcurrentHashSet;

@ApplicationScoped
Expand All @@ -49,8 +57,45 @@ public class FetchingService {
private final Map<URI, GitCloneDirectory.Details> detailsCache = new ConcurrentHashMap<>();
private final Set<CloseableDirectory> tempDirectories = new ConcurrentHashSet<>();

public QuarkiverseIO fetchQuarkiverseIo(FailureCollector failureCollector) {
return new QuarkiverseIO(quarkiverseIOConfig, failureCollector);
public QuarkiverseIO fetchQuarkiverseIo(FailureCollector failureCollector) throws IOException {
CloseableDirectory tempDir = CloseableDirectory.temp("quarkiverse-io");
Path artifact = null;
if (quarkiverseIOConfig.githubArtifact().isPresent()) {
QuarkiverseIOConfig.GithubArtifact ghConfig = quarkiverseIOConfig.githubArtifact().get();
GitHub github = new GitHubBuilder().withOAuthToken(ghConfig.token()).build();
GHRepository repository = github.getRepository(ghConfig.repository());
for (GHWorkflowRun run : repository.queryWorkflowRuns()
.conclusion(GHWorkflowRun.Conclusion.SUCCESS)
.list().withPageSize(10)) {
if (ghConfig.actionName().equals(run.getName())) {
for (GHArtifact ghArtifact : run.listArtifacts().withPageSize(5).toList()) {
if (ghConfig.artifactName().equals(ghArtifact.getName())) {
artifact = tempDir.path().resolve(ghArtifact.getName() + ".zip");
final Path finalArtifact = artifact;
Log.infof("Downloading Quarkiverse %s artifact #%s", ghConfig.artifactName(), ghArtifact.getId());
ghArtifact.download(is -> Files.copy(is, finalArtifact));
break;
}
}
break;
}
}
if (artifact == null) {
throw new IllegalStateException("Artifact " + ghConfig.artifactName() + " not found.");
}
} else if (quarkiverseIOConfig.zip().isPresent()) {
artifact = quarkiverseIOConfig.zip().get().path();
} else {
throw new IllegalStateException(
"Cannot fetch Quarkiverse guides as neither zip nor GitHub configuration was supplied");
}

unzip(artifact, tempDir.path());
Path pages = tempDir.path().resolve("pages");
untar(tempDir.path().resolve("artifact.tar"), pages);

return new QuarkiverseIO(quarkiverseIOConfig.enabled(), pages, quarkiverseIOConfig.baseUri(), failureCollector,
tempDir);
}

public QuarkusIO fetchQuarkusIo(FailureCollector failureCollector) {
Expand Down
149 changes: 60 additions & 89 deletions src/main/java/io/quarkus/search/app/quarkiverseio/QuarkiverseIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,18 @@

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Stream;

import jakarta.ws.rs.core.UriBuilder;

import io.quarkus.search.app.entity.Guide;
import io.quarkus.search.app.hibernate.InputProvider;
import io.quarkus.search.app.indexing.IndexableGuides;
Expand All @@ -31,110 +26,86 @@

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class QuarkiverseIO implements IndexableGuides, Closeable {

public static final String QUARKIVERSE_ORIGIN = "quarkiverse-hub";

private final URI quarkiverseDocsIndex;
private final FailureCollector failureCollector;

private final List<Guide> quarkiverseGuides = new ArrayList<>();
private final boolean enabled;
private final CloseableDirectory guideHtmls;
private final Path pages;
private final URI baseUri;
private final CloseableDirectory tempDir;

public QuarkiverseIO(QuarkiverseIOConfig config, FailureCollector failureCollector) {
this.quarkiverseDocsIndex = config.webUri();
this.enabled = config.enabled();
public QuarkiverseIO(boolean enabled, Path pages, URI baseUri, FailureCollector failureCollector,
CloseableDirectory tempDir) {
this.failureCollector = failureCollector;
try {
guideHtmls = CloseableDirectory.temp("quarkiverse_htmls_");
} catch (IOException e) {
throw new IllegalStateException("Failed to fetch quarkiverse guides: %s".formatted(e.getMessage()), e);
}
this.enabled = enabled;
this.pages = pages;
this.baseUri = baseUri;
this.tempDir = tempDir;
}

public void parseGuides() {
Document index = null;
try {
index = Jsoup.connect(quarkiverseDocsIndex.toString()).get();
try (Stream<Path> stream = Files.list(pages)) {
List<Path> guideDirectories = stream.filter(Files::isDirectory)
.filter(dir -> dir.getFileName().toString().startsWith("quarkus"))
.toList();
for (Path directory : guideDirectories) {
Files.walkFileTree(
directory, new SimpleFileVisitor<>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (file.getFileName().toString().endsWith(".html")) {
quarkiverseGuides.add(readGuide(file));
}
return FileVisitResult.CONTINUE;
}

@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
// some extensions have these extra directories that are not "visible" from the docs page themselves,
// but are still deployed (and hence accessible). We want to ignore those:
if (dir.getFileName().toString().equals("includes")) {
return FileVisitResult.SKIP_SUBTREE;
}
return super.preVisitDirectory(dir, attrs);
}
});
}
} catch (IOException e) {
failureCollector.critical(FailureCollector.Stage.PARSING, "Unable to fetch the Quarkiverse Docs index page.", e);
// no point in doing anything else here:
return;
}

// find links to quarkiverse extension docs:
Elements quarkiverseGuideIndexLinks = index.select("ul.components li.component a.title");

for (Element quarkiverseGuideIndexLink : quarkiverseGuideIndexLinks) {
Guide guide = new Guide();
String topLevelTitle = quarkiverseGuideIndexLink.text();
guide.title.set(topLevelTitle);

Document extensionIndex = null;
try {
extensionIndex = readGuide(guide, quarkiverseGuideIndexLink.absUrl("href"), Optional.empty());
} catch (URISyntaxException | IOException e) {
failureCollector.warning(FailureCollector.Stage.PARSING,
"Unable to fetch guide: " + topLevelTitle, e);
continue;
}

quarkiverseGuides.add(guide);

// find other sub-pages on the left side
Map<URI, String> indexLinks = new HashMap<>();
Elements extensionSubGuides = extensionIndex.select("nav.nav-menu .nav-item a");
for (Element element : extensionSubGuides) {
String href = element.absUrl("href");
URI uri = UriBuilder.fromUri(href).replaceQuery(null).fragment(null).build();
indexLinks.computeIfAbsent(uri, u -> element.text());
}

for (Map.Entry<URI, String> entry : indexLinks.entrySet()) {
Guide sub = new Guide();
sub.title.set(entry.getValue());
try {
readGuide(sub, entry.getKey().toString(), Optional.of(topLevelTitle));
} catch (URISyntaxException | IOException e) {
failureCollector.warning(FailureCollector.Stage.PARSING,
"Unable to fetch guide: " + topLevelTitle, e);
continue;
}
quarkiverseGuides.add(sub);
}
}
}

private Document readGuide(Guide guide, String link, Optional<String> titlePrefix) throws URISyntaxException, IOException {
guide.url = new URI(link);
private Guide readGuide(Path file) throws IOException {
Guide guide = new Guide();
guide.url = baseUri.resolve(pages.relativize(file).toString());
guide.type = "reference";
guide.origin = QUARKIVERSE_ORIGIN;

Document extensionIndex = Jsoup.connect(link).get();
Elements content = extensionIndex.select("div.content");
Document document = Jsoup.parse(file);

String title = content.select("h1.page").text();
if (!title.isBlank()) {
String actualTitle = titlePrefix.map(prefix -> "%s: %s".formatted(prefix, title)).orElse(title);
guide.title.set(actualTitle);
String version = pages.relativize(file).getName(1).toString();

String title = document.select("h1.page").text();
if (title.isBlank()) {
document.select("nav.breadcrumbs").text();
}
if (title.isBlank()) {
title = document.select("h3.title").text();
}
guide.summary.set(content.select("div#preamble").text());
guide.htmlFullContentProvider.set(new FileInputProvider(link, dumpHtmlToFile(content.html())));
// we always add version to the title, since Quarkiverse extensions can have multiple docs
// with the same titles for different versions, and it's hard to distinguish them otherwise.
guide.title.set("%s (%s)".formatted(title.trim(), version));

Log.debugf("Parsed guide: %s", guide.url);
return extensionIndex;
}
guide.summary.set(document.select("div#preamble").text());
guide.htmlFullContentProvider.set(new FileInputProvider(file));

private Path dumpHtmlToFile(String html) throws IOException {
Path path = guideHtmls.path().resolve(UUID.randomUUID().toString());
try (FileOutputStream fos = new FileOutputStream(path.toFile())) {
fos.write(html.getBytes(StandardCharsets.UTF_8));
}
return path;
Log.debugf("Parsed guide: %s", guide.url);
return guide;
}

public Stream<Guide> guides() {
Expand All @@ -147,12 +118,12 @@ public Stream<Guide> guides() {
@Override
public void close() throws IOException {
try (var closer = new Closer<IOException>()) {
closer.push(CloseableDirectory::close, guideHtmls);
closer.push(List::clear, quarkiverseGuides);
closer.push(CloseableDirectory::close, tempDir);
}
}

private record FileInputProvider(String link, Path content) implements InputProvider {
private record FileInputProvider(Path content) implements InputProvider {

@Override
public InputStream open() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,39 @@
package io.quarkus.search.app.quarkiverseio;

import java.net.URI;
import java.nio.file.Path;
import java.util.Optional;

import io.smallrye.config.ConfigMapping;
import io.smallrye.config.WithDefault;

@ConfigMapping(prefix = "quarkiverseio")
public interface QuarkiverseIOConfig {
String WEB_URI_DEFAULT_STRING = "https://docs.quarkiverse.io/index/explore/index.html";
URI WEB_URI_DEFAULT = URI.create(WEB_URI_DEFAULT_STRING);

@WithDefault(WEB_URI_DEFAULT_STRING)
URI webUri();
Optional<Zip> zip();

Optional<GithubArtifact> githubArtifact();

@WithDefault("true")
boolean enabled();

@WithDefault("https://docs.quarkiverse.io/")
URI baseUri();

interface GithubArtifact {
String token();

String repository();

//@WithDefault( "Publish website" )
String actionName();

//@WithDefault( "github-pages" )
String artifactName();
}

interface Zip {
Path path();
}

}
Loading

0 comments on commit f72f9ef

Please sign in to comment.