From cdce3e0962fa13fb88ddb13092bbcd628ff7a6da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Fri, 20 Oct 2023 14:24:04 +0200 Subject: [PATCH] feat: spq_label() aware of OPTIONAL (#205) --- R/{build_parts.R => build_part_body.R} | 41 +++++++++--- R/spq_add.R | 8 ++- R/spq_assemble.R | 16 +++-- R/spq_label.R | 39 +++++------ R/tracking.R | 33 ++++++++-- man/spq_add.Rd | 6 +- man/spq_assemble.Rd | 2 +- tests/testthat/_snaps/spq_label.md | 90 +++++++++++++++++++++++++- tests/testthat/test-build_parts.R | 7 -- tests/testthat/test-spq_label.R | 29 ++++++++- 10 files changed, 215 insertions(+), 56 deletions(-) rename R/{build_parts.R => build_part_body.R} (73%) diff --git a/R/build_parts.R b/R/build_part_body.R similarity index 73% rename from R/build_parts.R rename to R/build_part_body.R index 56d20855..d53fb16c 100644 --- a/R/build_parts.R +++ b/R/build_part_body.R @@ -1,5 +1,4 @@ #' Builds the "body" part of a query. -#' @param query a list with elements of the query #' @param subject an anonymous variable (for instance, #' and by default, "?subject") or item (for instance "wd:Q456")) #' @param verb the property (for instance "wdt:P190") @@ -14,17 +13,15 @@ #' @param within_distance if provided, north-west and south-east coordinates of #' bounding box for the triple query. #' @noRd -build_part_body = function(query = NA, - triple = NULL, +build_part_body = function(triple = NULL, subject = NULL, verb = NULL, object = NULL, required = TRUE, within_box = c(NA, NA), within_distance = c(NA, NA), - filter = NA) { - - part_body = query[["body"]] + filter = NA, + other_triples) { if (!is.null(triple)) { elts = decompose_triple_pattern(triple) @@ -42,11 +39,39 @@ build_part_body = function(query = NA, ) } else { new_triple = glue::glue("{subject} {verb} {object}.") + if (sub(".$", "", new_triple) %in% other_triples[["sibling_triple"]]) { + little_siblings = other_triples[other_triples[["sibling_triple"]] == sub(".$", "", new_triple),] + little_siblings = split(little_siblings, seq_len(nrow(little_siblings))) + + sibling_triples = purrr::map_chr( + little_siblings, + ~build_part_body( + triple = .x[["triple"]], + required = .x[["required"]], + within_box = .x[["within_box"]], + within_distance = .x[["within_distance"]], + filter = .x[["filter"]], + other_triples = other_triples + ) + ) %>% + paste(collapse = "") + } else { + sibling_triples = NA + } + } + + if (!is.na(sibling_triples)) { + new_triple = sprintf("\n\t%s\n\t%s\n", new_triple, sibling_triples) + } + + + if (!is.na(filter)) { + new_triple = sprintf("\n\t%s\n\tFILTER(%s)\n", new_triple, filter) } if (!required) { new_triple = if (!is.na(filter)) { - sprintf("OPTIONAL {\n\t%s\n\tFILTER(%s)\n}\n", new_triple, filter) + sprintf("OPTIONAL {%s}\n", new_triple) } else { sprintf("OPTIONAL {%s}", new_triple) } @@ -86,5 +111,5 @@ build_part_body = function(query = NA, ) } - glue::glue("{part_body}\n{new_triple}") + sprintf("\n%s", new_triple) } diff --git a/R/spq_add.R b/R/spq_add.R index 0b842217..bd6e0b88 100644 --- a/R/spq_add.R +++ b/R/spq_add.R @@ -21,6 +21,8 @@ #' for the center coordinates to be retrieved directly from the query. #' @param .prefixes Custom prefixes #' @param .filter Filter for the triple. Only use this with `.required=FALSE` +#' @param .sibling_triple_pattern Triple this triple is to be grouped with, +#' especially (only?) useful if the sibling triple is optional. #' @export #' @section Examples: #' ```r @@ -60,7 +62,8 @@ spq_add = function(.query = NULL, .label = NA, .within_box = c(NA, NA), .within_distance = c(NA, NA), - .filter = NULL) { + .filter = NULL, + .sibling_triple_pattern = NA) { .query = .query %||% spq_init() elts = decompose_triple_pattern( @@ -84,7 +87,8 @@ spq_add = function(.query = NULL, required = .required, within_box = list(.within_box), within_distance = list(.within_distance), - filter = .filter + filter = .filter, + sibling_triple = .sibling_triple_pattern ) # variable tracking ---- diff --git a/R/spq_assemble.R b/R/spq_assemble.R index 9d117eed..0b363d84 100644 --- a/R/spq_assemble.R +++ b/R/spq_assemble.R @@ -138,17 +138,23 @@ spq_assemble = function(.query, strict = TRUE) { # body ---- triples_present = !is.null(.query[["triples"]]) body = if (triples_present) { + + firstborn_triples = .query[["triples"]][is.na(.query[["triples"]][["sibling_triple"]]),] + firstborn_triples = split(firstborn_triples, seq_len(nrow(firstborn_triples))) + + # they'll be built as we build their big siblings + other_triples = .query[["triples"]][!is.na(.query[["triples"]][["sibling_triple"]]),] + purrr::map_chr( - split(.query[["triples"]], seq_len(nrow(.query[["triples"]]))), + firstborn_triples, ~build_part_body( - query = .query, triple = .x[["triple"]], required = .x[["required"]], within_box = .x[["within_box"]], within_distance = .x[["within_distance"]], - filter = .x[["filter"]] - ), - .query = .query + filter = .x[["filter"]], + other_triples = other_triples + ) ) |> paste0(collapse = "") } else { diff --git a/R/spq_label.R b/R/spq_label.R index 05c17ec4..a8bf4196 100644 --- a/R/spq_label.R +++ b/R/spq_label.R @@ -42,8 +42,7 @@ spq_label <- function(.query, .languages = getOption("glitter.lang", "en$"), .overwrite = FALSE) { - label_property <- .query[["endpoint_info"]][["label_property"]] %||% - "rdfs:label" + label_property <- .query[["endpoint_info"]][["label_property"]] %||%"rdfs:label" vars = purrr::map_chr(rlang::enquos(...), spq_treat_argument) @@ -55,33 +54,28 @@ spq_label <- function(.query, if (is.null(.languages)) { filter = NULL } else { - - languages_filter <- purrr::map_chr(.languages, create_lang_filter, x = x) - - filter = paste( - languages_filter, - collapse = " || " - ) + languages_filter = purrr::map_chr(.languages, create_lang_filter, x = x) + filter = paste(languages_filter, collapse = " || ") } - if (.required) { - q = spq_add( - query, - sprintf("%s %s %s_labell", x, label_property, x), - .required = .required - ) - if (!is.null(filter)) { - q = spq_filter(q, spq(filter)) - } + triples_for_var = .query[["triples"]][ + .query[["triples"]][["triple"]] %in% + .query[["vars"]][["triple"]][.query[["vars"]][["name"]] == x], + ] + triple_for_var_optional <- all(!triples_for_var[["required"]]) + sibling_triple_pattern = if (triple_for_var_optional) { + utils::tail(triples_for_var[["triple"]], n = 1) } else { + NA + } + q = spq_add( query, sprintf("%s %s %s_labell", x, label_property, x), .required = .required, - .filter = filter + .filter = filter, + .sibling_triple_pattern = sibling_triple_pattern ) - } - mutate_left <- sprintf("%s_label", sub("\\?", "", x)) mutate_right <- sprintf("coalesce(%s_labell, '')", un_question_mark(x)) @@ -90,8 +84,7 @@ spq_label <- function(.query, q = do.call(spq_mutate, args_list) q = spq_select(q, sprintf("-%s_labell", un_question_mark(x))) - # we add the language of the label - # because of regional variants + # we add the language of the label because of regional variants if (!is.null(.languages)) { if (length(.languages) > 1 || !endsWith(.languages, "$")) { mutate_left <- sprintf("%s_label_lang", un_question_mark(x)) diff --git a/R/tracking.R b/R/tracking.R index aadc4442..e2b40c5f 100644 --- a/R/tracking.R +++ b/R/tracking.R @@ -62,9 +62,18 @@ track_triples <- function(.query, required, within_box, within_distance, - filter = NULL) { + filter = NULL, + sibling_triple = NA) { if (triple %in% .query[["triples"]][["triple"]]) { - cli::cli_abort("Duplicate triple {.val triple}") + cli::cli_abort("Duplicate triple {.val triple}.") + } + + if (!is.na(sibling_triple)) { + sibling_absent <- !(sibling_triple %in% .query[["triples"]][["triple"]]) + + if (sibling_absent) { + cli::cli_abort("Can't find sibling triple {.val sibling_triple}.") + } } no_within_box = (sum(is.na(within_box[[1]])) == 2) @@ -84,7 +93,8 @@ track_triples <- function(.query, required = required, within_box = within_box, within_distance = within_distance, - filter = filter + filter = filter, + sibling_triple = sibling_triple ) .query[["triples"]] <- rbind(.query[["triples"]], new_triple) @@ -92,10 +102,23 @@ track_triples <- function(.query, .query } -track_filters <- function(.query, filter) { +track_filters <- function(.query, filter, sibling_triple = NA) { + + if (!is.na(sibling_triple)) { + sibling_absent <- !(sibling_triple %in% .query[["triples"]][["triple"]]) + + if (sibling_absent) { + cli::cli_abort("Can't find sibling triple {.val sibling_triple}.") + } + } + var <- str_extract(filter, "\\(\\?(.*?)\\)") var <- sub("\\,.*", "", sub("\\(", "", sub("\\)", "", var))) - new_filter <- tibble::tibble(filter = filter, var = var) + new_filter <- tibble::tibble( + filter = filter, + var = var, + sibling_triple = sibling_triple + ) .query[["filters"]] <- rbind(.query[["filters"]], new_filter) diff --git a/man/spq_add.Rd b/man/spq_add.Rd index ab1b89dd..56803141 100644 --- a/man/spq_add.Rd +++ b/man/spq_add.Rd @@ -15,7 +15,8 @@ spq_add( .label = NA, .within_box = c(NA, NA), .within_distance = c(NA, NA), - .filter = NULL + .filter = NULL, + .sibling_triple_pattern = NA ) } \arguments{ @@ -51,6 +52,9 @@ The center can also be provided as a variable (for instance, "?location") for the center coordinates to be retrieved directly from the query.} \item{.filter}{Filter for the triple. Only use this with \code{.required=FALSE}} + +\item{.sibling_triple_pattern}{Triple this triple is to be grouped with, +especially (only?) useful if the sibling triple is optional.} } \description{ Add a triple pattern statement to a query diff --git a/man/spq_assemble.Rd b/man/spq_assemble.Rd index 91e5235a..bc32d0f9 100644 --- a/man/spq_assemble.Rd +++ b/man/spq_assemble.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/build_sparql.R +% Please edit documentation in R/spq_assemble.R \name{spq_assemble} \alias{spq_assemble} \title{Assemble query parts into a proper SPARQL query} diff --git a/tests/testthat/_snaps/spq_label.md b/tests/testthat/_snaps/spq_label.md index f3ddf499..520076c1 100644 --- a/tests/testthat/_snaps/spq_label.md +++ b/tests/testthat/_snaps/spq_label.md @@ -118,8 +118,8 @@ # spq_label() .languages = NULL Code - spq_init(endpoint = "hal") %>% spq_label(labo, .languages = NULL, .required = TRUE) %>% - spq_add("?labo dcterms:identifier ?labo_id", .required = FALSE) %>% + spq_init(endpoint = "hal") %>% spq_add("?labo dcterms:identifier ?labo_id", + .required = FALSE) %>% spq_label(labo, .languages = NULL, .required = TRUE) %>% spq_filter(str_detect(labo_label, "EVS|(UMR 5600)|(Environnement Ville Soc)")) Output PREFIX dcterms: @@ -127,10 +127,94 @@ SELECT ?labo (COALESCE(?labo_labell,'') AS ?labo_label) ?labo_id WHERE { + OPTIONAL { + ?labo dcterms:identifier ?labo_id. + ?labo skos:prefLabel ?labo_labell. - OPTIONAL {?labo dcterms:identifier ?labo_id.} + } BIND(COALESCE(?labo_labell,'') AS ?labo_label) FILTER(REGEX(?labo_label,"EVS|(UMR 5600)|(Environnement Ville Soc)")) } +# spq_label() for optional thing + + Code + spq_init() %>% spq_add("?film wdt:P31 wd:Q11424") %>% spq_add( + "?film wdt:P840 ?loc") %>% spq_add("?loc wdt:P625 ?coords") %>% spq_add( + "?film wdt:P3383 ?image") %>% spq_add("?film wdt:P921 ?subject", .required = FALSE) %>% + spq_add("?film wdt:P577 ?date") %>% spq_label(film, loc, subject) %>% + spq_head(10) + Output + PREFIX rdfs: + SELECT ?coords ?date ?film (COALESCE(?film_labell,'') AS ?film_label) ?image ?loc (COALESCE(?loc_labell,'') AS ?loc_label) ?subject (COALESCE(?subject_labell,'') AS ?subject_label) + WHERE { + + ?film wdt:P31 wd:Q11424. + ?film wdt:P840 ?loc. + ?loc wdt:P625 ?coords. + ?film wdt:P3383 ?image. + OPTIONAL { + ?film wdt:P921 ?subject. + + OPTIONAL { + ?subject rdfs:label ?subject_labell. + FILTER(lang(?subject_labell) IN ('en')) + } + + } + ?film wdt:P577 ?date. + OPTIONAL { + ?film rdfs:label ?film_labell. + FILTER(lang(?film_labell) IN ('en')) + } + + OPTIONAL { + ?loc rdfs:label ?loc_labell. + FILTER(lang(?loc_labell) IN ('en')) + } + + + } + + LIMIT 10 + +--- + + Code + spq_init() %>% spq_add("?film wdt:P31 wd:Q11424") %>% spq_add( + "?film wdt:P840 ?loc") %>% spq_add("?loc wdt:P625 ?coords") %>% spq_add( + "?film wdt:P3383 ?image") %>% spq_add("?film wdt:P921 ?subject", .required = FALSE) %>% + spq_add("?film wdt:P577 ?date") %>% spq_label(film, loc, subject, .required = TRUE) %>% + spq_head(10) + Output + PREFIX rdfs: + SELECT ?coords ?date ?film (COALESCE(?film_labell,'') AS ?film_label) ?image ?loc (COALESCE(?loc_labell,'') AS ?loc_label) ?subject (COALESCE(?subject_labell,'') AS ?subject_label) + WHERE { + + ?film wdt:P31 wd:Q11424. + ?film wdt:P840 ?loc. + ?loc wdt:P625 ?coords. + ?film wdt:P3383 ?image. + OPTIONAL { + ?film wdt:P921 ?subject. + + + ?subject rdfs:label ?subject_labell. + FILTER(lang(?subject_labell) IN ('en')) + + } + ?film wdt:P577 ?date. + + ?film rdfs:label ?film_labell. + FILTER(lang(?film_labell) IN ('en')) + + + ?loc rdfs:label ?loc_labell. + FILTER(lang(?loc_labell) IN ('en')) + + + } + + LIMIT 10 + diff --git a/tests/testthat/test-build_parts.R b/tests/testthat/test-build_parts.R index d0947a90..c098ac95 100644 --- a/tests/testthat/test-build_parts.R +++ b/tests/testthat/test-build_parts.R @@ -1,10 +1,3 @@ -test_that("build_part_body() return strings", { - - x = build_part_body(query=NULL,subject="?city",verb="wdt:P625",object="?coords") - expect_type(x, "character") - -}) - test_that("within_distance is not broken", { expect_snapshot( spq_init() %>% diff --git a/tests/testthat/test-spq_label.R b/tests/testthat/test-spq_label.R index fc7803e1..930c4d40 100644 --- a/tests/testthat/test-spq_label.R +++ b/tests/testthat/test-spq_label.R @@ -60,8 +60,35 @@ test_that("spq_label() .overwrite", { test_that("spq_label() .languages = NULL", { expect_snapshot( spq_init(endpoint = "hal") %>% - spq_label(labo, .languages = NULL, .required = TRUE) %>% spq_add("?labo dcterms:identifier ?labo_id", .required = FALSE) %>% + spq_label(labo, .languages = NULL, .required = TRUE) %>% spq_filter(str_detect(labo_label,"EVS|(UMR 5600)|(Environnement Ville Soc)")) ) }) + + +test_that("spq_label() for optional thing", { + expect_snapshot( + spq_init() %>% + spq_add("?film wdt:P31 wd:Q11424") %>% + spq_add("?film wdt:P840 ?loc") %>% + spq_add("?loc wdt:P625 ?coords") %>% + spq_add("?film wdt:P3383 ?image") %>% + spq_add("?film wdt:P921 ?subject", .required=FALSE) %>% + spq_add("?film wdt:P577 ?date") %>% + spq_label(film,loc,subject) %>% + spq_head(10) + ) + + expect_snapshot( + spq_init() %>% + spq_add("?film wdt:P31 wd:Q11424") %>% + spq_add("?film wdt:P840 ?loc") %>% + spq_add("?loc wdt:P625 ?coords") %>% + spq_add("?film wdt:P3383 ?image") %>% + spq_add("?film wdt:P921 ?subject", .required=FALSE) %>% + spq_add("?film wdt:P577 ?date") %>% + spq_label(film,loc,subject, .required = TRUE) %>% + spq_head(10) + ) +})