From 694e7cfd0b8d47155f9bbafc4f938c02535c5747 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 12 Nov 2024 21:30:16 +0100 Subject: [PATCH 1/6] Implementation of ROLIE feeds Still work in progress --- csaf | 2 +- .../github/csaf/sbom/retrieval/CsafLoader.kt | 14 +++ .../csaf/sbom/retrieval/RetrievedProvider.kt | 87 +++++++++++++------ .../sbom/retrieval/RetrievedProviderTest.kt | 12 +++ .../.well-known/csaf/feed-tlp-white.json | 52 +++++++++++ 5 files changed, 138 insertions(+), 29 deletions(-) create mode 100644 csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json diff --git a/csaf b/csaf index b01a0a7a..395fbfea 160000 --- a/csaf +++ b/csaf @@ -1 +1 @@ -Subproject commit b01a0a7a176f257e47bf6e4e3ac42d0def9620b1 +Subproject commit 395fbfea077ca05d663c7dc930850ae18f40e936 diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt index e5dcc5f8..412f94ba 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/CsafLoader.kt @@ -19,6 +19,7 @@ package io.github.csaf.sbom.retrieval import io.github.csaf.sbom.schema.generated.Aggregator import io.github.csaf.sbom.schema.generated.Csaf import io.github.csaf.sbom.schema.generated.Provider +import io.github.csaf.sbom.schema.generated.ROLIEFeed import io.ktor.client.* import io.ktor.client.call.* import io.ktor.client.engine.* @@ -96,6 +97,19 @@ class CsafLoader(engine: HttpClientEngine = Java.create()) { suspend fun fetchDocument(url: String, ctx: RetrievalContext): Result = Result.of { get(url, ctx.responseCallback()).also(ctx.jsonCallback()) } + /** + * Fetch and parse a ROLE feed from a given URL. + * + * @param url the URL where the ROLIE feed is found + * @param responseCallback An optional callback to further evaluate the [HttpResponse]. + * @return The resulting [ROLIEFeed], wrapped in a [Result] monad, if successful. A failed + * [Result] wrapping the thrown [Throwable] in case of an error. + */ + suspend fun fetchROLIEFeed( + url: String, + responseCallback: ((HttpResponse) -> Unit)? = null + ): Result = Result.of { get(url, responseCallback ?: {}) } + /** * Fetch an arbitrary URL's content as plain text [String], falling back to UTF-8 if no charset * is provided. diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt index 20044b8d..21fd8ed3 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt @@ -21,11 +21,13 @@ import io.github.csaf.sbom.retrieval.roles.CSAFProviderRole import io.github.csaf.sbom.retrieval.roles.CSAFPublisherRole import io.github.csaf.sbom.retrieval.roles.CSAFTrustedProviderRole import io.github.csaf.sbom.schema.generated.Provider +import io.github.csaf.sbom.schema.generated.ROLIEFeed import java.util.* import java.util.concurrent.CompletableFuture import java.util.stream.Stream import java.util.stream.StreamSupport import kotlinx.coroutines.* +import kotlinx.coroutines.channels.ProducerScope import kotlinx.coroutines.channels.ReceiveChannel import kotlinx.coroutines.channels.produce import kotlinx.coroutines.channels.toList @@ -36,6 +38,7 @@ import kotlinx.coroutines.future.future * "trusted provider"), including its metadata (in the form of [Provider]) as well as functionality * to retrieve its documents. */ +@OptIn(ExperimentalCoroutinesApi::class) class RetrievedProvider(val json: Provider) : Validatable { /** @@ -58,7 +61,6 @@ class RetrievedProvider(val json: Provider) : Validatable { * to [DEFAULT_CHANNEL_CAPACITY]. * @return The fetched [Result]s, representing index contents or fetch errors. */ - @OptIn(ExperimentalCoroutinesApi::class) fun fetchDocumentIndices( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY @@ -83,6 +85,27 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + fun fetchRolieFeeds( + loader: CsafLoader = lazyLoader, + channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY + ): ReceiveChannel>> { + val feeds = json.distributions?.mapNotNull { it.rolie }?.flatMap { it.feeds } ?: listOf() + + // This channel collects up to `channelCapacity` directory indices concurrently. + val rolieChannel = + ioScope.produce(capacity = channelCapacity) { + for (feed in feeds) { + send(feed to async { loader.fetchROLIEFeed(feed.url.toString()) }) + } + } + // This terminal channel is a simple "rendezvous channel" for awaiting the Results. + return ioScope.produce { + for ((feed, indexDeferred) in rolieChannel) { + send(feed to indexDeferred.await()) + } + } + } + /** * This function sums up the expected number of [RetrievedDocument]s that will be fetched from * this Provider. @@ -92,7 +115,6 @@ class RetrievedProvider(val json: Provider) : Validatable { * to [DEFAULT_CHANNEL_CAPACITY]. * @return The expected number of [RetrievedDocument]s provided. */ - @OptIn(ExperimentalCoroutinesApi::class) suspend fun countExpectedDocuments( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY @@ -124,36 +146,13 @@ class RetrievedProvider(val json: Provider) : Validatable { channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): ReceiveChannel> { val indexChannel = fetchDocumentIndices(loader, channelCapacity) + val rolieChannel = fetchRolieFeeds(loader, channelCapacity) + // This second channel collects up to `channelCapacity` Results concurrently, which // represent CSAF Documents or errors from fetching or validation. val documentJobChannel = ioScope.produce>>(capacity = channelCapacity) { - for ((directoryUrl, indexResult) in indexChannel) { - indexResult.fold( - { index -> - index.lines().map { line -> - send( - async { - val csafUrl = "$directoryUrl/$line" - RetrievedDocument.from(csafUrl, loader, role) - } - ) - } - }, - { e -> - send( - async { - Result.failure( - Exception( - "Failed to fetch index.txt from directory at $directoryUrl", - e - ) - ) - } - ) - } - ) - } + fetchDirectoryBased(indexChannel, loader) } // This terminal channel is a simple "rendezvous channel" for awaiting the Results. return ioScope.produce { @@ -163,6 +162,38 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + private suspend fun ProducerScope>>.fetchDirectoryBased( + indexChannel: ReceiveChannel>>, + loader: CsafLoader + ) { + for ((directoryUrl, indexResult) in indexChannel) { + indexResult.fold( + { index -> + index.lines().map { line -> + send( + async { + val csafUrl = "$directoryUrl/$line" + RetrievedDocument.from(csafUrl, loader, role) + } + ) + } + }, + { e -> + send( + async { + Result.failure( + Exception( + "Failed to fetch index.txt from directory at $directoryUrl", + e + ) + ) + } + ) + } + ) + } + } + /** * This method provides the [Result]s of [fetchDocuments] as a Java [Stream] for usage in * non-Kotlin environments. diff --git a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt index 13519f03..b381a5ea 100644 --- a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt +++ b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt @@ -70,6 +70,18 @@ class RetrievedProviderTest { assertFalse(documentIndexResults[1].second.isSuccess) } + @Test + fun testFetchRolieFeeds() = runTest { + val provider = RetrievedProvider.from("example.com").getOrThrow() + val documentIndexResults = provider.fetchRolieFeeds().toList() + assertEquals( + 1, + documentIndexResults.size, + "Expected exactly 1 results: One index.txt content and one fetch error" + ) + assertTrue(documentIndexResults[0].second.isSuccess) + } + private suspend fun providerTest(domain: String) { val provider = RetrievedProvider.from(domain).getOrThrow() val expectedDocumentCount = provider.countExpectedDocuments() diff --git a/csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json b/csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json new file mode 100644 index 00000000..51a9d159 --- /dev/null +++ b/csaf-retrieval/src/test/resources/example.com/.well-known/csaf/feed-tlp-white.json @@ -0,0 +1,52 @@ +{ + "feed": { + "id": "example-csaf-feed-tlp-white", + "title": "Example CSAF feed (TLP:WHITE)", + "link": [ + { + "rel": "self", + "href": "https://example.com/.well-known/csaf/feed-tlp-white.json" + } + ], + "category": [ + { + "scheme": "urn:ietf:params:rolie:category:information-type", + "term": "csaf" + } + ], + "updated": "2021-01-01T12:00:00.000Z", + "entry": [ + { + "id": "2020-ESA-001", + "title": "Example Security Advisory 001", + "link": [ + { + "rel": "self", + "href": "https://example.com/directory/2022/bsi-2022-0001.json" + }, + { + "rel": "hash", + "href": "https://example.com/directory/2022/bsi-2022-0001.json.sha512" + }, + { + "rel": "signature", + "href": "https://example.com/directory/2022/bsi-2022-0001.json.asc" + } + ], + "published": "2021-01-01T11:00:00.000Z", + "updated": "2021-01-01T12:00:00.000Z", + "summary": { + "content": "Vulnerabilities fixed in ABC 0.0.1" + }, + "content": { + "type": "application/json", + "src": "https://example.com/directory/2022/bsi-2022-0001.json" + }, + "format": { + "schema": "https://docs.oasis-open.org/csaf/csaf/v2.0/csaf_json_schema.json", + "version": "2.0" + } + } + ] + } +} \ No newline at end of file From 7d3edadda44ba607ac46e80bc56c40f70c22d40e Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 12 Nov 2024 22:02:06 +0100 Subject: [PATCH 2/6] Fetching from ROLIE feed --- .../csaf/sbom/retrieval/RetrievedProvider.kt | 44 ++++++++++++++++--- .../retrieval/RetrievedProviderJavaTest.java | 4 +- .../sbom/retrieval/RetrievedProviderTest.kt | 14 +++--- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt index 21fd8ed3..399e3616 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt @@ -21,6 +21,7 @@ import io.github.csaf.sbom.retrieval.roles.CSAFProviderRole import io.github.csaf.sbom.retrieval.roles.CSAFPublisherRole import io.github.csaf.sbom.retrieval.roles.CSAFTrustedProviderRole import io.github.csaf.sbom.schema.generated.Provider +import io.github.csaf.sbom.schema.generated.Provider.Feed import io.github.csaf.sbom.schema.generated.ROLIEFeed import java.util.* import java.util.concurrent.CompletableFuture @@ -91,7 +92,7 @@ class RetrievedProvider(val json: Provider) : Validatable { ): ReceiveChannel>> { val feeds = json.distributions?.mapNotNull { it.rolie }?.flatMap { it.feeds } ?: listOf() - // This channel collects up to `channelCapacity` directory indices concurrently. + // This channel collects up to `channelCapacity` feeds concurrently. val rolieChannel = ioScope.produce(capacity = channelCapacity) { for (feed in feeds) { @@ -100,8 +101,8 @@ class RetrievedProvider(val json: Provider) : Validatable { } // This terminal channel is a simple "rendezvous channel" for awaiting the Results. return ioScope.produce { - for ((feed, indexDeferred) in rolieChannel) { - send(feed to indexDeferred.await()) + for ((feed, feedDeferred) in rolieChannel) { + send(feed to feedDeferred.await()) } } } @@ -146,13 +147,14 @@ class RetrievedProvider(val json: Provider) : Validatable { channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): ReceiveChannel> { val indexChannel = fetchDocumentIndices(loader, channelCapacity) - val rolieChannel = fetchRolieFeeds(loader, channelCapacity) + val rolieFeedsChannel = fetchRolieFeeds(loader, channelCapacity) // This second channel collects up to `channelCapacity` Results concurrently, which // represent CSAF Documents or errors from fetching or validation. val documentJobChannel = ioScope.produce>>(capacity = channelCapacity) { - fetchDirectoryBased(indexChannel, loader) + fetchDocumentsFromIndices(indexChannel, loader) + fetchDocumentsFromRolieFeeds(rolieFeedsChannel, loader) } // This terminal channel is a simple "rendezvous channel" for awaiting the Results. return ioScope.produce { @@ -162,7 +164,8 @@ class RetrievedProvider(val json: Provider) : Validatable { } } - private suspend fun ProducerScope>>.fetchDirectoryBased( + private suspend fun ProducerScope>> + .fetchDocumentsFromIndices( indexChannel: ReceiveChannel>>, loader: CsafLoader ) { @@ -194,6 +197,35 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + private suspend fun ProducerScope>> + .fetchDocumentsFromRolieFeeds( + rolieChannel: ReceiveChannel>>, + loader: CsafLoader + ) { + for ((feed, rolieResult) in rolieChannel) { + rolieResult.fold( + { rolie -> + rolie.feed.entry.map { entry -> + send( + async { + RetrievedDocument.from(entry.content.src.toString(), loader, role) + } + ) + } + }, + { e -> + send( + async { + Result.failure( + Exception("Failed to fetch feeds from directory at ${feed.url}", e) + ) + } + ) + } + ) + } + } + /** * This method provides the [Result]s of [fetchDocuments] as a Java [Stream] for usage in * non-Kotlin environments. diff --git a/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java b/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java index c08f4075..f0aafebe 100644 --- a/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java +++ b/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java @@ -52,9 +52,9 @@ public void testRetrievedProviderJava() throws InterruptedException, ExecutionEx final var documentResultsExplicit = providerExplicit.streamDocuments(loader, DEFAULT_CHANNEL_CAPACITY).toList(); final var documentResultsExplicitSlow = providerExplicit.streamDocuments(loader, 1).toList(); assertEquals( - 4, + 5, documentResults.size(), - "Expected exactly 4 results: One document, two document errors, one index error" + "Expected exactly 5 results: Two documents, two document errors, one index error" ); assertEquals( documentResults.size(), diff --git a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt index b381a5ea..85c96ad1 100644 --- a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt +++ b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt @@ -73,13 +73,9 @@ class RetrievedProviderTest { @Test fun testFetchRolieFeeds() = runTest { val provider = RetrievedProvider.from("example.com").getOrThrow() - val documentIndexResults = provider.fetchRolieFeeds().toList() - assertEquals( - 1, - documentIndexResults.size, - "Expected exactly 1 results: One index.txt content and one fetch error" - ) - assertTrue(documentIndexResults[0].second.isSuccess) + val rolieFeedsResults = provider.fetchRolieFeeds().toList() + assertEquals(1, rolieFeedsResults.size, "Expected exactly 1 result: One parsed ROLIE feed") + assertTrue(rolieFeedsResults[0].second.isSuccess) } private suspend fun providerTest(domain: String) { @@ -88,9 +84,9 @@ class RetrievedProviderTest { assertEquals(3, expectedDocumentCount, "Expected 3 documents") val documentResults = provider.fetchDocuments().toList() assertEquals( - 4, + 5, documentResults.size, - "Expected exactly 4 results: One document, two document errors, one index error" + "Expected exactly 5 results: Two documents, two document errors, one index error" ) // Check some random property on successful document assertEquals( From 70953e2b2aa1aa9c76ac64d6143c133378c031ef Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 12 Nov 2024 22:10:16 +0100 Subject: [PATCH 3/6] Added missing file --- .../csaf/sbom/retrieval/RetrievedProvider.kt | 6 +- .../schema/ROLIE_feed_json_schema.json | 291 ++++++++++++++++++ 2 files changed, 295 insertions(+), 2 deletions(-) create mode 100644 csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt index 399e3616..68eb5b06 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt @@ -89,7 +89,7 @@ class RetrievedProvider(val json: Provider) : Validatable { fun fetchRolieFeeds( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY - ): ReceiveChannel>> { + ): ReceiveChannel>> { val feeds = json.distributions?.mapNotNull { it.rolie }?.flatMap { it.feeds } ?: listOf() // This channel collects up to `channelCapacity` feeds concurrently. @@ -164,6 +164,7 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + /** Populates this [ProducerScope] with the contents of the [indexChannel]. */ private suspend fun ProducerScope>> .fetchDocumentsFromIndices( indexChannel: ReceiveChannel>>, @@ -197,9 +198,10 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + /** Populates this [ProducerScope] with the contents of the [rolieChannel]. */ private suspend fun ProducerScope>> .fetchDocumentsFromRolieFeeds( - rolieChannel: ReceiveChannel>>, + rolieChannel: ReceiveChannel>>, loader: CsafLoader ) { for ((feed, rolieResult) in rolieChannel) { diff --git a/csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json b/csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json new file mode 100644 index 00000000..8afe6460 --- /dev/null +++ b/csaf-schema/src/main/resources/schema/ROLIE_feed_json_schema.json @@ -0,0 +1,291 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/oasis-tcs/csaf/master/csaf_2.0/json_schema/ROLIE_feed_json_schema.json", + "title": "ROLIE Feed auxiliary Schema", + "description": "Representation of CSAF ROLIE feed as a JSON document.", + "$defs": { + "json_link_t": { + "title": "JSON Link", + "description": "Contains the URL of the JSON file.", + "type": "string", + "format": "uri", + "pattern": "^https://.+\\.json$" + }, + "link_t": { + "title": "List of Links", + "description": "Contains a list of links related to the current context.", + "type": "array", + "prefixItems": [ + { + "title": "Link", + "description": "Specifies the JSON link.", + "type": "object", + "required": [ + "rel", + "href" + ], + "properties": { + "href": { + "title": "Hyper reference", + "description": "Contains the URL of the JSON file.", + "$ref": "#/$defs/json_link_t" + }, + "rel": { + "title": "Relationship", + "description": "Contains the relationship value of the link.", + "type": "string", + "enum": [ + "self" + ] + } + } + } + ], + "minItems": 1, + "uniqueItems": true, + "items": { + "title": "Link", + "description": "Specifies a single link.", + "type": "object", + "required": [ + "rel", + "href" + ], + "properties": { + "href": { + "title": "Hyper reference", + "description": "Contains the URL of the link.", + "type": "string", + "format": "uri" + }, + "rel": { + "title": "Relationship", + "description": "Contains the relationship value of the link.", + "type": "string", + "minLength": 1 + } + } + } + } + }, + "type": "object", + "required": [ + "feed" + ], + "properties": { + "feed": { + "title": "CSAF ROLIE feed", + "description": "Contains all information of the feed.", + "type": "object", + "required": [ + "id", + "title", + "link", + "category", + "updated", + "entry" + ], + "properties": { + "id": { + "title": "ID", + "description": "Contains a unique identifier for this ROLIE feed.", + "type": "string", + "pattern": "^[a-zA-Z0-9+\\-_\\.]+$", + "minLength": 1 + }, + "title": { + "title": "Feed title", + "description": "Contains the title for this ROLIE feed.", + "type": "string", + "minLength": 1 + }, + "link": { + "title": "List of Links", + "description": "Contains a list of links related to this feed.", + "$ref": "#/$defs/link_t" + }, + "category": { + "title": "List of Categories", + "description": "Contains a list of categories related to this feed.", + "type": "array", + "prefixItems": [ + { + "title": "CSAF ROLIE category", + "description": "Contains the required ROLIE category value.", + "type": "object", + "required": [ + "scheme", + "term" + ], + "properties": { + "scheme": { + "title": "Scheme", + "description": "Contains the URI of the scheme to use.", + "type": "string", + "enum": [ + "urn:ietf:params:rolie:category:information-type" + ] + }, + "term": { + "title": "Term", + "description": "Contains the term that is valid in the context of the scheme.", + "type": "string", + "enum": [ + "csaf" + ] + } + } + } + ], + "minItems": 1, + "uniqueItems": true, + "items": { + "title": "Category", + "description": "Specifies a single category.", + "type": "object", + "required": [ + "scheme", + "term" + ], + "properties": { + "scheme": { + "title": "Scheme", + "description": "Contains the URI of the scheme to use.", + "type": "string", + "format": "uri" + }, + "term": { + "title": "Term", + "description": "Contains the term that is valid in the context of the scheme.", + "type": "string", + "minLength": 1 + } + } + } + }, + "updated": { + "title": "Updated", + "description": "Contains the date and time this feed was updated the last time.", + "type": "string", + "format": "date-time" + }, + "entry": { + "title": "List of Entries", + "description": "Contains a list of feed entries.", + "type": "array", + "uniqueItems": true, + "items": { + "title": "Entry", + "description": "Contains all information for a single feed entry.", + "type": "object", + "required": [ + "id", + "title", + "link", + "published", + "updated", + "content", + "format" + ], + "properties": { + "id": { + "title": "ID", + "description": "Contains the document tracking ID of the CSAF document.", + "type": "string", + "pattern": "^[\\S](.*[\\S])?$", + "minLength": 1 + }, + "title": { + "title": "Title", + "description": "Contains the document title of the CSAF document.", + "type": "string", + "minLength": 1 + }, + "link": { + "title": "List of Links", + "description": "Contains a list of links related to this entry.", + "$ref": "#/$defs/link_t" + }, + "published": { + "title": "Published", + "description": "Contains the date and time this entry was initially added to the feed.", + "type": "string", + "format": "date-time" + }, + "updated": { + "title": "Updated", + "description": "Contains the date and time this entry was the last time updated in the feed.", + "type": "string", + "format": "date-time" + }, + "summary": { + "title": "Summary", + "description": "Contains the summary of the CSAF document.", + "type": "object", + "properties": { + "content": { + "title": "Content", + "description": "Contains the actual text of the summary.", + "type": "string", + "minLength": 1 + } + } + }, + "content": { + "title": "Content of the entry", + "description": "Contains information about the content.", + "type": "object", + "required": [ + "type", + "src" + ], + "properties": { + "src": { + "title": "Source Code", + "description": "Contains a link to the source code of the file", + "$ref": "#/$defs/json_link_t" + }, + "type": { + "title": "MIME type", + "description": "Contains the MIME type of the content.", + "type": "string", + "enum": [ + "application/json" + ] + } + } + }, + "format": { + "title": "Format", + "description": "Contains information about the format of the entry.", + "type": "object", + "required": [ + "schema", + "version" + ], + "properties": { + "schema": { + "title": "Schema of the entry", + "description": "Contains the schema the CSAF document is valid against.", + "type": "string", + "enum": [ + "https://docs.oasis-open.org/csaf/csaf/v2.0/csaf_json_schema.json" + ] + }, + "version": { + "title": "CSAF Version", + "description": "Contains the CSAF version the document was written in.", + "type": "string", + "enum": [ + "2.0" + ] + } + } + } + } + } + } + } + } + } +} \ No newline at end of file From 26b450b7bc7240029a4d452a7030963a5d237fbd Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 12 Nov 2024 22:17:46 +0100 Subject: [PATCH 4/6] Added missing test case --- .../io/github/csaf/sbom/retrieval/CsafLoaderTest.kt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt index 08581758..b8ed85bf 100644 --- a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt +++ b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/CsafLoaderTest.kt @@ -102,4 +102,13 @@ class CsafLoaderTest { } assertFalse { result.isSuccess } } + + @Test + fun testFetchROLIEFeed() = runTest { + val result = + loader.fetchROLIEFeed("does-not-really-exist.json") { + assertSame(HttpStatusCode.NotFound, it.status) + } + assertFalse { result.isSuccess } + } } From cb4091ee69f96987405a7026c9b2dcc1fe3971f5 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 12 Nov 2024 22:41:14 +0100 Subject: [PATCH 5/6] Added more documentation --- .../csaf/sbom/retrieval/RetrievedProvider.kt | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt index 68eb5b06..f771a22c 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt @@ -55,20 +55,22 @@ class RetrievedProvider(val json: Provider) : Validatable { } /** - * This function fetches all directory indices referenced by this provider. + * This function fetches all directory indices referenced by this provider and sends them to a + * [ReceiveChannel]. * * @param loader The instance of [CsafLoader] used for fetching of online resources. * @param channelCapacity The capacity of the channels used to buffer parallel fetches. Defaults * to [DEFAULT_CHANNEL_CAPACITY]. - * @return The fetched [Result]s, representing index contents or fetch errors. + * @return A [ReceiveChannel] containing the fetched [Result]s, representing index contents or + * fetch errors. */ fun fetchDocumentIndices( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): ReceiveChannel>> { - @Suppress("SimpleRedundantLet") val directoryUrls = (json.distributions ?: emptySet()).mapNotNull { distribution -> + @Suppress("SimpleRedundantLet") distribution.directory_url?.let { it.toString().trimEnd('/') } } // This channel collects up to `channelCapacity` directory indices concurrently. @@ -86,6 +88,16 @@ class RetrievedProvider(val json: Provider) : Validatable { } } + /** + * This function fetches all ROLIE feeds referenced by this provider and sends them to a + * [ReceiveChannel]. + * + * @param loader The instance of [CsafLoader] used for fetching of online resources. + * @param channelCapacity The capacity of the channels used to buffer parallel fetches. Defaults + * to [DEFAULT_CHANNEL_CAPACITY]. + * @return A [ReceiveChannel] containing the fetched [Result]s, representing ROLIE feeds' + * contents (as [ROLIEFeed]) or fetch errors. + */ fun fetchRolieFeeds( loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY From e87a6a0feb7af288c1f45f5a644b340e000667bd Mon Sep 17 00:00:00 2001 From: milux Date: Wed, 13 Nov 2024 14:10:56 +0100 Subject: [PATCH 6/6] Simplify document streaming logic and update tests Refactor document streaming to combine URL fetching from indices and ROLIE feeds, removing redundant channels. Updated logic to calculate expected documents and adjusted test cases to reflect the changes. --- csaf | 2 +- .../io/github/csaf/sbom/retrieval/Main.kt | 1 + .../csaf/sbom/retrieval/RetrievedProvider.kt | 122 ++++++++++-------- .../retrieval/RetrievedProviderJavaTest.java | 4 +- .../sbom/retrieval/RetrievedProviderTest.kt | 35 ++++- 5 files changed, 99 insertions(+), 65 deletions(-) diff --git a/csaf b/csaf index 395fbfea..b01a0a7a 160000 --- a/csaf +++ b/csaf @@ -1 +1 @@ -Subproject commit 395fbfea077ca05d663c7dc930850ae18f40e936 +Subproject commit b01a0a7a176f257e47bf6e4e3ac42d0def9620b1 diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt index 9c1198a4..5246597b 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/Main.kt @@ -25,6 +25,7 @@ fun main(args: Array) { RetrievedProvider.from(args[0]) .onSuccess { provider -> println("Discovered provider-metadata.json @ ${provider.json.canonical_url}") + println("Expected documents: ${provider.countExpectedDocuments()}") // Retrieve all documents from all feeds. Note: we currently only support index.txt for (result in provider.fetchDocuments()) { result.onSuccess { doc -> diff --git a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt index f771a22c..4918f778 100644 --- a/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt +++ b/csaf-retrieval/src/main/kotlin/io/github/csaf/sbom/retrieval/RetrievedProvider.kt @@ -28,10 +28,7 @@ import java.util.concurrent.CompletableFuture import java.util.stream.Stream import java.util.stream.StreamSupport import kotlinx.coroutines.* -import kotlinx.coroutines.channels.ProducerScope -import kotlinx.coroutines.channels.ReceiveChannel -import kotlinx.coroutines.channels.produce -import kotlinx.coroutines.channels.toList +import kotlinx.coroutines.channels.* import kotlinx.coroutines.future.future /** @@ -132,17 +129,7 @@ class RetrievedProvider(val json: Provider) : Validatable { loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): Int { - val indexChannel = fetchDocumentIndices(loader, channelCapacity) - // This second channel collects up to `channelCapacity` Results concurrently, which - // represent CSAF Documents or errors from fetching or validation. - val documentCountChannel = - ioScope.produce(capacity = channelCapacity) { - for ((_, indexResult) in indexChannel) { - indexResult.onSuccess { send(it.lines().size) } - } - } - // This terminal channel is a simple "rendezvous channel" for awaiting the Results. - return documentCountChannel.toList().sum() + return fetchAllDocumentUrls(loader, channelCapacity).toList().filter { it.isSuccess }.size } /** @@ -158,15 +145,17 @@ class RetrievedProvider(val json: Provider) : Validatable { loader: CsafLoader = lazyLoader, channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY ): ReceiveChannel> { - val indexChannel = fetchDocumentIndices(loader, channelCapacity) - val rolieFeedsChannel = fetchRolieFeeds(loader, channelCapacity) - + val documentUrlChannel = fetchAllDocumentUrls(loader, channelCapacity) // This second channel collects up to `channelCapacity` Results concurrently, which // represent CSAF Documents or errors from fetching or validation. val documentJobChannel = ioScope.produce>>(capacity = channelCapacity) { - fetchDocumentsFromIndices(indexChannel, loader) - fetchDocumentsFromRolieFeeds(rolieFeedsChannel, loader) + for (result in documentUrlChannel) { + result.fold( + { send(async { RetrievedDocument.from(it, loader, role) }) }, + { send(async { Result.failure(it) }) } + ) + } } // This terminal channel is a simple "rendezvous channel" for awaiting the Results. return ioScope.produce { @@ -176,64 +165,87 @@ class RetrievedProvider(val json: Provider) : Validatable { } } - /** Populates this [ProducerScope] with the contents of the [indexChannel]. */ - private suspend fun ProducerScope>> - .fetchDocumentsFromIndices( - indexChannel: ReceiveChannel>>, - loader: CsafLoader + /** + * Returns a channel that produces all URLs from ROLIE feeds and directory indices without + * duplicates. + * + * @param loader The instance of [CsafLoader] used for fetching of online resources. + * @param channelCapacity The capacity of the channels used to buffer parallel fetches. Defaults + * to [DEFAULT_CHANNEL_CAPACITY]. + * @return + */ + @OptIn(ExperimentalCoroutinesApi::class) + fun fetchAllDocumentUrls( + loader: CsafLoader = lazyLoader, + channelCapacity: Int = DEFAULT_CHANNEL_CAPACITY + ): ReceiveChannel> { + val urlResultChannel = + ioScope.produce(capacity = channelCapacity) { + fetchDocumentUrlsFromIndices(fetchDocumentIndices(loader, channelCapacity)) + fetchDocumentUrlsFromRolieFeeds(fetchRolieFeeds(loader, channelCapacity)) + } + return ioScope.produce { + val seenUrls = mutableSetOf() + for (urlResult in urlResultChannel) { + urlResult.fold( + { + if (seenUrls.add(it)) { + send(Result.success(it)) + } + }, + { send(Result.failure(it)) } + ) + } + } + } + + /** + * Sends the URLs obtained from the [indexChannel] to the given [ProducerScope]. + * + * @param indexChannel The source channel providing directory index data. + * @receiver urlChannel The target channel where URLs are sent to. + */ + private suspend fun SendChannel>.fetchDocumentUrlsFromIndices( + indexChannel: ReceiveChannel>> ) { for ((directoryUrl, indexResult) in indexChannel) { indexResult.fold( { index -> - index.lines().map { line -> - send( - async { - val csafUrl = "$directoryUrl/$line" - RetrievedDocument.from(csafUrl, loader, role) - } - ) - } + index.lines().map { line -> send(Result.success("$directoryUrl/$line")) } }, { e -> send( - async { - Result.failure( - Exception( - "Failed to fetch index.txt from directory at $directoryUrl", - e - ) + Result.failure( + Exception( + "Failed to fetch index.txt from directory at $directoryUrl", + e ) - } + ) ) } ) } } - /** Populates this [ProducerScope] with the contents of the [rolieChannel]. */ - private suspend fun ProducerScope>> - .fetchDocumentsFromRolieFeeds( - rolieChannel: ReceiveChannel>>, - loader: CsafLoader + /** + * Sends the URLs obtained from the [rolieChannel] feeds to the given [ProducerScope]. + * + * @param rolieChannel The source channel providing ROLIE feed data. + * @receiver urlChannel The target channel where URLs are sent to. + */ + private suspend fun SendChannel>.fetchDocumentUrlsFromRolieFeeds( + rolieChannel: ReceiveChannel>> ) { for ((feed, rolieResult) in rolieChannel) { rolieResult.fold( { rolie -> rolie.feed.entry.map { entry -> - send( - async { - RetrievedDocument.from(entry.content.src.toString(), loader, role) - } - ) + send(Result.success(entry.content.src.toString())) } }, { e -> send( - async { - Result.failure( - Exception("Failed to fetch feeds from directory at ${feed.url}", e) - ) - } + Result.failure(Exception("Failed to fetch ROLIE feed from ${feed.url}", e)) ) } ) diff --git a/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java b/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java index f0aafebe..c08f4075 100644 --- a/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java +++ b/csaf-retrieval/src/test/java/io/github/csaf/sbom/retrieval/RetrievedProviderJavaTest.java @@ -52,9 +52,9 @@ public void testRetrievedProviderJava() throws InterruptedException, ExecutionEx final var documentResultsExplicit = providerExplicit.streamDocuments(loader, DEFAULT_CHANNEL_CAPACITY).toList(); final var documentResultsExplicitSlow = providerExplicit.streamDocuments(loader, 1).toList(); assertEquals( - 5, + 4, documentResults.size(), - "Expected exactly 5 results: Two documents, two document errors, one index error" + "Expected exactly 4 results: One document, two document errors, one index error" ); assertEquals( documentResults.size(), diff --git a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt index 85c96ad1..5d4b002f 100644 --- a/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt +++ b/csaf-retrieval/src/test/kotlin/io/github/csaf/sbom/retrieval/RetrievedProviderTest.kt @@ -20,13 +20,14 @@ import io.github.csaf.sbom.validation.ValidationException import kotlin.test.* import kotlinx.coroutines.channels.toList import kotlinx.coroutines.test.runTest +import org.junit.jupiter.api.assertThrows class RetrievedProviderTest { init { CsafLoader.defaultLoaderFactory = { CsafLoader(mockEngine()) } } - @Test fun testRetrievedProviderFrom() = runTest { providerTest("example.com") } + @Test fun testRetrievedProviderFrom() = runTest { providerTest("example.com", 4) } @Test fun testRetrievedProviderFromSecurityTxt() = runTest { @@ -78,16 +79,12 @@ class RetrievedProviderTest { assertTrue(rolieFeedsResults[0].second.isSuccess) } - private suspend fun providerTest(domain: String) { + private suspend fun providerTest(domain: String, numResults: Int = 5) { val provider = RetrievedProvider.from(domain).getOrThrow() val expectedDocumentCount = provider.countExpectedDocuments() assertEquals(3, expectedDocumentCount, "Expected 3 documents") val documentResults = provider.fetchDocuments().toList() - assertEquals( - 5, - documentResults.size, - "Expected exactly 5 results: Two documents, two document errors, one index error" - ) + assertEquals(numResults, documentResults.size, "Expected exactly $numResults results") // Check some random property on successful document assertEquals( "Bundesamt für Sicherheit in der Informationstechnik", @@ -114,4 +111,28 @@ class RetrievedProviderTest { documentResults[3].exceptionOrNull()?.message ) } + + @Test + fun testFetchAllDocumentUrls() = runTest { + val provider = RetrievedProvider.from("example.com").getOrThrow() + val urlResults = provider.fetchAllDocumentUrls().toList() + + assertEquals(4, urlResults.size, "Expected exactly 4 results") + assertEquals( + "https://example.com/directory/2022/bsi-2022-0001.json", + urlResults[0].getOrThrow() + ) + assertEquals( + "https://example.com/directory/2022/bsi-2022_2-01.json", + urlResults[1].getOrThrow() + ) + assertEquals( + "https://example.com/directory/2024/does-not-exist.json", + urlResults[2].getOrThrow() + ) + assertEquals( + "Failed to fetch index.txt from directory at https://example.com/invalid-directory", + (assertThrows { urlResults[3].getOrThrow() }).message + ) + } }