From 3f017711cb8082a0524a30966a4ccad0469c4256 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 4 Apr 2024 17:13:41 -0400 Subject: [PATCH 01/34] First pass on mjs and PDFJS 4.x. Adds also accesibility Not a lot since we have so far next/prev. But it does work. Tab selection. Passes the tests. --- format_strawberryfield.libraries.yml | 9 +- js/pdfs_strawberry.js | 254 +++++++++--------- js/pdfs_strawberry_webpack.js | 5 + .../FieldFormatter/StrawberryPdfFormatter.php | 13 + .../format-strawberryfield-pdfs.html.twig | 12 +- 5 files changed, 164 insertions(+), 129 deletions(-) create mode 100644 js/pdfs_strawberry_webpack.js diff --git a/format_strawberryfield.libraries.yml b/format_strawberryfield.libraries.yml index 59789eb2..87a6d329 100644 --- a/format_strawberryfield.libraries.yml +++ b/format_strawberryfield.libraries.yml @@ -158,16 +158,19 @@ jsm_model_strawberry: - format_strawberryfield/iiif_formatstrawberryfield_utils pdfs_mozilla: - version: 2.2.228 + version: 4.0.379 license: name: Apache url: //raw.githubusercontent.com/mozilla/pdf.js/master/LICENSE gpl-compatible: true js: - https://cdn.jsdelivr.net/npm/pdfjs-dist@2.2.228/build/pdf.min.js: { external: true, minified: true, preprocess: false} + # js/pdfs_strawberry_webpack.js: {minified: false, attributes: { type: module } } Unused but kept around for posterity + # The only reason we use this one directly is to ensure faster (browser cache) and to use as path/settings in the actual + # js/pdfs_strawberry.js where we load the node module directly async. New to PDFJS 4.0.379+ + https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/build/pdf.min.mjs: { external: true, minified: true, attributes: { type: module }} pdfs_strawberry: - version: 1.2 + version: 1.3 js: js/pdfs_strawberry.js: {minified: false} dependencies: diff --git a/js/pdfs_strawberry.js b/js/pdfs_strawberry.js index 535fff46..fc187113 100644 --- a/js/pdfs_strawberry.js +++ b/js/pdfs_strawberry.js @@ -1,7 +1,8 @@ -(function ($, Drupal, once, drupalSettings, pdfjsLib, pdfjsWorker) { +(function ($, Drupal, once, drupalSettings) { 'use strict'; Drupal.behaviors.format_strawberryfield_pdfjs = { - attach: function(context, settings) { + attach: function (context, settings) { + // The workerSrc property is a must! // @TODO: Would love to simply push ther 'worker' property since we already have it loaded // but i lack enough understanding of the API. @@ -10,143 +11,154 @@ // and worker being the global window['pdfjs-dist/build/pdf.worker'] should work // basically doc.src='url' and doc.worker = window['pdfjs-dist/build/pdf.worker' ? const elementsToAttach = once('attache_pdf', '.strawberry-document-item[data-iiif-document]', context); - $(elementsToAttach).each(function (index, value) { - var $document = $(this).data("iiif-document"); - let $theid = $(this).attr("id"); - let $thefileselectorid = $(this).attr("id") + '_file_selector'; - var $initialpage = $(this).data("iiif-initialpage"); - pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@2.2.228/build/pdf.worker.min.js'; - - /** - * Single Page render - * Could be used for thumbnails? - */ - function singlePageRender() { - // Asynchronous download of PDF - var loadingTask = pdfjsLib.getDocument($document); - loadingTask.promise.then(function (pdf) { - console.log('PDF loaded'); - // Fetch the first page - var pageNumber = 1; - pdf.getPage(pageNumber).then(function (page) { - console.log('Page loaded'); - - var scale = 1.5; - var viewport = page.getViewport({scale: scale}); + import("https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/build/pdf.min.mjs").then((pdfjsLib) => { + pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/build/pdf.worker.min.mjs'; + $(elementsToAttach).each(function (index, value) { + var $document = $(this).data("iiif-document"); + let $theid = $(this).attr("id"); + let $thefileselectorid = $(this).attr("id") + '_file_selector'; + var $initialpage = $(this).data("iiif-initialpage"); + const $viewer_type = $(this).data("iiif-pdfjs-type"); + /** + * Single Page render + * Could be used for thumbnails? + */ + function singlePageRender() { + // Asynchronous download of PDF + var loadingTask = pdfjsLib.getDocument($document); + loadingTask.promise.then(function (pdf) { + console.log('PDF loaded'); + // Fetch the first page + var pageNumber = 1; + pdf.getPage(pageNumber).then(function (page) { + console.log('Page loaded'); + + var scale = 1.5; + var viewport = page.getViewport({scale: scale}); + + // Prepare canvas using PDF page dimensions + var canvas = document.getElementById($theid); + var ctx = canvas.getContext('2d'); + canvas.height = viewport.height; + canvas.width = viewport.width; + + // Render PDF page into canvas context + var renderContext = { + canvasContext: ctx, + viewport: viewport + }; + var renderTask = page.render(renderContext); + renderTask.promise.then(function () { + console.log('Page rendered'); + }); + }); + }, function (reason) { + // PDF loading error + console.error(reason); + }); + } + + var pdfDoc = null, + pageNum = $initialpage, + pageRendering = false, + pageNumPending = null, + scale = 1.5, + canvas = document.getElementById($theid), + ctx = canvas.getContext('2d'); - // Prepare canvas using PDF page dimensions - var canvas = document.getElementById($theid); - var ctx = canvas.getContext('2d'); + /** + * Get page info, resize and render. + * @param num Page number. + */ + function renderPage(num) { + pageRendering = true; + // Using promise to fetch the page + pdfDoc.getPage(num).then(function (page) { + var viewport = page.getViewport({scale: scale}); canvas.height = viewport.height; canvas.width = viewport.width; - // Render PDF page into canvas context var renderContext = { canvasContext: ctx, viewport: viewport }; var renderTask = page.render(renderContext); + renderTask.promise.then(function () { - console.log('Page rendered'); + pageRendering = false; + if (pageNumPending !== null) { + renderPage(pageNumPending); + pageNumPending = null; + } }); }); - }, function (reason) { - // PDF loading error - console.error(reason); - }); - } - - var pdfDoc = null, - pageNum = $initialpage, - pageRendering = false, - pageNumPending = null, - scale = 1.5, - canvas = document.getElementById($theid), - ctx = canvas.getContext('2d'); - - /** - * Get page info, resize and render. - * @param num Page number. - */ - function renderPage(num) { - pageRendering = true; - // Using promise to fetch the page - pdfDoc.getPage(num).then(function(page) { - var viewport = page.getViewport({scale: scale}); - canvas.height = viewport.height; - canvas.width = viewport.width; - - var renderContext = { - canvasContext: ctx, - viewport: viewport - }; - var renderTask = page.render(renderContext); - - renderTask.promise.then(function() { - pageRendering = false; - if (pageNumPending !== null) { - renderPage(pageNumPending); - pageNumPending = null; - } - }); - }); - // Update page counters - document.getElementById($theid+'-pagenum').textContent = num; - } - - function queueRenderPage(num) { - if (pageRendering) { - pageNumPending = num; - } else { - renderPage(num); + // Update page counters + document.getElementById($theid + '-pagenum').textContent = num; } - } - - /** - * Displays prev. page. - */ - function onPrevPage() { - if (pageNum <= 1) { - return; + + function queueRenderPage(num) { + if (pageRendering) { + pageNumPending = num; + } else { + renderPage(num); + } } - pageNum--; - queueRenderPage(pageNum); - } - document.getElementById($theid+'-prev').addEventListener('click', onPrevPage); - - /** - * Displays next page. - */ - function onNextPage() { - if (pageNum >= pdfDoc.numPages) { - return; + + /** + * Displays prev. page. + */ + function onPrevPage() { + if (pageNum <= 1) { + return; + } + pageNum--; + queueRenderPage(pageNum); } - pageNum++; - queueRenderPage(pageNum); - } - document.getElementById($theid+'-next').addEventListener('click', onNextPage); - - /** - * Asynchronously downloads PDF. - */ - pdfjsLib.getDocument($document).promise.then(function(pdfDoc_) { - pdfDoc = pdfDoc_; - document.getElementById($theid+'-pagecount').textContent = pdfDoc.numPages; - renderPage(pageNum); - }); - let select_file = $('#'+$thefileselectorid); - if (select_file.length) { - select_file.change(function() { - pdfjsLib.getDocument($( this ).val()).promise.then(function(pdfDoc_) { - pdfDoc = pdfDoc_; - document.getElementById($theid+'-pagecount').textContent = pdfDoc.numPages; + document.getElementById($theid + '-prev').addEventListener('click', onPrevPage); + + /** + * Displays next page. + */ + function onNextPage() { + if (pageNum >= pdfDoc.numPages) { + return; + } + pageNum++; + queueRenderPage(pageNum); + } + + document.getElementById($theid + '-next').addEventListener('click', onNextPage); + + /** + * Asynchronously downloads PDF. + */ + pdfjsLib.getDocument($document).promise.then(function (pdfDoc_) { + pdfDoc = pdfDoc_; + + document.getElementById($theid + '-pagecount').textContent = pdfDoc.numPages; + renderPage(pageNum); - }); }); - } + let select_file = $('#' + $thefileselectorid); + if (select_file.length) { + select_file.change(function () { + pdfjsLib.getDocument($(this).val()).promise.then(function (pdfDoc_) { + pdfDoc = pdfDoc_; + document.getElementById($theid + '-pagecount').textContent = pdfDoc.numPages; + renderPage(pageNum); + }); + }); + } + + }); }); + + + + + } - }; -})(jQuery, Drupal, once, drupalSettings, window.pdfjsLib, window.pdfjsWorker); + } +})(jQuery, Drupal, once, drupalSettings); diff --git a/js/pdfs_strawberry_webpack.js b/js/pdfs_strawberry_webpack.js new file mode 100644 index 00000000..ef25ca6f --- /dev/null +++ b/js/pdfs_strawberry_webpack.js @@ -0,0 +1,5 @@ +// Static loading of ES6 Modules. But we can't trust drupal/browser of loading this one first (even if we set it as a dependency +// So leaving it as Unused for now, since i don't want to read 4 hours of JS help issues again. +import * as pdfjsLib from "https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/build/pdf.min.mjs"; +pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@4.0.379/build/pdf.worker.min.mjs'; +window['pdfjsLib'] = pdfjsLib; diff --git a/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php index 645c7f96..75c8f187 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php @@ -49,6 +49,7 @@ public static function defaultSettings() { 'initial_page' => 1, 'number_pages' => 1, 'quality' => 'default', + 'pdfjs_type' => 'simple', 'rotation' => '0', ]; } @@ -63,6 +64,15 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '#title' => t('JSON Key from where to fetch Document URLs'), '#default_value' => $this->getSetting('json_key_source'), ], + 'pdfjs_type' => [ + '#type' => 'select', + '#title' => t('Pick the type of PDFJS driven viewer'), + '#default_value' => $this->getSetting('pdfjs_type'), + '#options' => [ + 'simple' => $this->t('Only next/prev'), + 'normal' => $this->t('With Search '), + ] + ], 'number_documents' => [ '#type' => 'number', '#title' => $this->t('Number of Documents to extract for Key'), @@ -282,6 +292,7 @@ protected function generateElementForItem(int $delta, FieldItemListInterface $it $max_width = $this->getSetting('max_width'); $max_width_css = empty($max_width) || $max_width == 0 ? '100%' : $max_width .'px'; + $pdfjs_type = $this->getSetting('pdfjs_type'); $max_height = $this->getSetting('max_height'); $number_pages = $this->getSetting('number_pages'); $initial_page = $this->getSetting('initial_page'); @@ -316,6 +327,7 @@ protected function generateElementForItem(int $delta, FieldItemListInterface $it '#theme' => 'format_strawberryfield_pdfs', '#item' => [ 'id' => 'document_' . $uniqueid, + 'type' => $pdfjs_type, ] ]; @@ -336,6 +348,7 @@ protected function generateElementForItem(int $delta, FieldItemListInterface $it 'data-iiif-document' => $publicurl->toString(), 'data-iiif-initialpage' => $initial_page, 'data-iiif-pages' => $number_pages, + 'data-iiif-pdfjs-type' => $pdfjs_type, ], '#alt' => $this->t( 'PDF @name for @label', diff --git a/templates/format-strawberryfield-pdfs.html.twig b/templates/format-strawberryfield-pdfs.html.twig index 5bd05a14..45f98cd1 100644 --- a/templates/format-strawberryfield-pdfs.html.twig +++ b/templates/format-strawberryfield-pdfs.html.twig @@ -1,5 +1,7 @@ -
-
Pages: 1 of
- - -
+ From 6967224811f5165b0b34ceba15942955915185bb Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 4 Apr 2024 17:20:55 -0400 Subject: [PATCH 02/34] Reads better --- src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php index 75c8f187..1c7792bd 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryPdfFormatter.php @@ -70,7 +70,7 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '#default_value' => $this->getSetting('pdfjs_type'), '#options' => [ 'simple' => $this->t('Only next/prev'), - 'normal' => $this->t('With Search '), + 'normal' => $this->t('With Native PDF.js Search'), ] ], 'number_documents' => [ From 21999c85d21d6ae8cfc9e12c75590b16c3c3d2d8 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 22 Apr 2024 13:09:13 -0400 Subject: [PATCH 03/34] IIIF Content Search API + small type on JS (doc) --- js/pdfs_strawberry.js | 2 +- .../IiifContentSearchController.php | 45 ++++++++++++------- src/Form/IiifSettingsForm.php | 34 ++++++++++++-- 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/js/pdfs_strawberry.js b/js/pdfs_strawberry.js index fc187113..60ca637a 100644 --- a/js/pdfs_strawberry.js +++ b/js/pdfs_strawberry.js @@ -4,7 +4,7 @@ attach: function (context, settings) { // The workerSrc property is a must! - // @TODO: Would love to simply push ther 'worker' property since we already have it loaded + // @TODO: Would love to simply push the 'worker' property since we already have it loaded // but i lack enough understanding of the API. // See here https://github.com/mozilla/pdf.js/blob/master/src/display/api.js#L218 // Probably (have to check) passing an Object with that key instead of a string to pdfjsLib.getDocument($doc) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 334f2318..8974c0e1 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -295,21 +295,27 @@ function () use ($metadataexposeconfig_entity, $node) { // Calculate Canvas and its offset // PDFs Sequence is correctly detected, but on images it should always be "1" // For that we will change the response from the main Solr search using our expected ID (splitting) - - $canvas = $image_hash[$hits_per_file_and_sequence['sbf_metadata']['uri']][$hits_per_file_and_sequence['sbf_metadata']['sequence_id']] ?? []; - foreach ($canvas as $canvas_id => $canvas_data) { + $uris = []; + foreach ($this->iiifConfig->get('iiif_content_search_api_file_uri_fields') ?? [] as $uri_field) { + $uris[] = $hits_per_file_and_sequence['sbf_metadata'][$uri_field] ?? NULL; + } + $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; + $uris = array_filter($uris); + $uri = reset($uris); + if ($uri) { + $canvas = $image_hash[$uri][$sequence_id] ?? []; + foreach ($canvas as $canvas_id => $canvas_data) { if ($canvas_id) { $canvas_parts = explode("#xywh=", $canvas_id); if (count($canvas_parts) == 2) { - $canvas_offset = explode(',' , $canvas_parts[1]); + $canvas_offset = explode(',', $canvas_parts[1]); $canvas_position = [ round($annotation['l'] * ($canvas_offset[2] ?? $canvas_data[0]) + $canvas_offset[0]), round($annotation['t'] * ($canvas_offset[3] ?? $canvas_data[1]) + $canvas_offset[1]), round(($annotation['r'] - $annotation['l']) * $canvas_offset[2]), round(($annotation['b'] - $annotation['t']) * $canvas_offset[3]), ]; - } - else { + } else { $canvas_position = [ round($annotation['l'] * $canvas_data[0]), round($annotation['t'] * $canvas_data[1]), @@ -333,37 +339,39 @@ function () use ($metadataexposeconfig_entity, $node) { // Generate the entry if ($version == "v1") { $entries[] = [ - "@id" => $current_url_clean + "@id" => $current_url_clean . "/annotation/anno-result/$i", - "@type" => "oa:Annotation", + "@type" => "oa:Annotation", "motivation" => "painting", - "resource" => [ + "resource" => [ "@type" => "cnt:ContentAsText", "chars" => $annotation['snippet'], ], - "on" => ($canvas_parts[0] ?? $canvas_id) . $canvas_position + "on" => ($canvas_parts[0] ?? $canvas_id) . $canvas_position ]; - } - elseif ($version == "v2") { + } elseif ($version == "v2") { $entries[] = [ - "id" => $current_url_clean + "id" => $current_url_clean . "/annotation/anno-result/$i", - "type" => "Annotation", + "type" => "Annotation", "motivation" => "painting", - "body" => [ + "body" => [ "type" => "TextualBody", "value" => $annotation['snippet'], "format" => "text/plain", ], - "target" => $canvas_id . $canvas_position + "target" => $canvas_id . $canvas_position ]; } } } + } } } } - + if (count($entries) == 0) { + $results['total'] = 0; + } if ($results['total'] > $this->iiifConfig->get('iiif_content_search_api_results_per_page')) { $max_page = ceil($results['total']/$this->iiifConfig->get('iiif_content_search_api_results_per_page')) - 1; if ($version == "v1") { @@ -549,6 +557,9 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i } if (count($image_uris)) { + //Note here. If we don't have any fields configured the response will contain basically ANYTHING + // in the repo. So option 1 is make `iiif_content_search_api_file_uri_fields` required + // bail out if empty? foreach ($this->iiifConfig->get('iiif_content_search_api_file_uri_fields') ?? [] as $uri_field) { if (isset($allfields_translated_to_solr[$uri_field])) { $uri_conditions->addCondition($uri_field, $image_uris, 'IN'); diff --git a/src/Form/IiifSettingsForm.php b/src/Form/IiifSettingsForm.php index fcf389f4..4e68780a 100644 --- a/src/Form/IiifSettingsForm.php +++ b/src/Form/IiifSettingsForm.php @@ -127,11 +127,39 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#default_value' => !empty($config->get('iiif_content_search_api_parent_node_fields')) ? $config->get( 'iiif_content_search_api_parent_node_fields' ) : [], - '#required' => FALSE, + '#required' => TRUE, '#multiple' => TRUE ]; $field_options = $this->getSbfFields('file_url'); + + $form['iiif_content_search_api_visual_enabled_processors'] = [ + '#type' => 'textfield', + '#title' => $this->t( + 'Strawberry Runner processors that should be searched against for visual highlights.' + ), + '#description' => $this->t( + 'e.g Strawberry Flavor Data might have been generated by the "ocr" strawberry runners processor. A comma separated list of processors (machine names) that generated miniOCR.' + ), + '#default_value' => !empty($config->get('iiif_content_search_api_visual_enabled_processors')) ? $config->get( + 'iiif_content_search_api_visual_enabled_processors' + ) : [], + '#required' => TRUE, + ]; + $form['iiif_content_search_api_time_enabled_processors'] = [ + '#type' => 'textfield', + '#title' => $this->t( + 'Strawberry Runner processors that should be searched against for time based media.' + ), + '#description' => $this->t( + 'e.g Strawberry Flavor Data might have been generated by the "subtitle" strawberry runners processor. A comma separated list of processors (machine names) that generated time based transcripts encoded as miniOCR.' + ), + '#default_value' => !empty($config->get('iiif_content_search_api_visual_enabled_processors')) ? $config->get( + 'iiif_content_search_api_time_enabled_processors' + ) : [], + '#required' => FALSE, + ]; + $form['iiif_content_search_api_file_uri_fields'] = [ '#type' => 'select', '#title' => $this->t( @@ -144,7 +172,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#default_value' => !empty($config->get('iiif_content_search_api_file_uri_fields')) ? $config->get( 'iiif_content_search_api_file_uri_fields' ) : [], - '#required' => FALSE, + '#required' => TRUE, '#multiple' => TRUE ]; $form['iiif_content_search_api_results_per_page'] = [ @@ -278,7 +306,7 @@ protected function getSbfFields($type = NULL) { $field->getDataDefinition(); $fields[$field_id] = $field->getPrefixedLabel(); } - //&& ($property_path !== "nid" || $property_path !== "uuid") + //&& ($property_path !== "nid" || $property_path !== "uuid") } From 26a9a7f71fa45edc7add01984e53e9024a0c01f8 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 25 Apr 2024 22:20:33 -0400 Subject: [PATCH 04/34] Adds differentiated processor settings for X/Y (OCR), Time based, and pure text to the Content Search API @alliomeria (will explain tomorrow) --- ...field.entity.metadataapi_entity.schema.yml | 31 +++++++++++++++++++ src/Form/IiifSettingsForm.php | 26 ++++++++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 config/schema/format_strawberryfield.entity.metadataapi_entity.schema.yml diff --git a/config/schema/format_strawberryfield.entity.metadataapi_entity.schema.yml b/config/schema/format_strawberryfield.entity.metadataapi_entity.schema.yml new file mode 100644 index 00000000..4faca208 --- /dev/null +++ b/config/schema/format_strawberryfield.entity.metadataapi_entity.schema.yml @@ -0,0 +1,31 @@ +'metadataexpose_entity.metadataapi_entity.*': + type: config_entity + label: 'Metadata API entity using Twig and Views Configuration' + mapping: + id: + type: string + label: 'ID' + label: + type: label + label: 'Label' + uuid: + type: string + metadataWrapperDisplayentity: + label: 'Metadata Display entity that will output the API wrapper and process the json of the items & api arguments' + type: string + metadataItemDisplayentity: + label: 'Metadata Display entity that will output the each Item process the json coming from each Views result row & api arguments' + type: string + views_source_ids: + type: sequence + label: 'The views used to generate results. These might also get arguments mapped from the API to any exposed relationship, filter, etc.' + sequence: + - type: string + api_type: + label: 'If rest or Sword. Rest APIs can use HTTP codes and header to communicate, but Sword ones always return bodies.' + type: string + cache: + type: boolean + active: + type: boolean + label: 'Whether this endpoint is active' diff --git a/src/Form/IiifSettingsForm.php b/src/Form/IiifSettingsForm.php index 4e68780a..f79406c6 100644 --- a/src/Form/IiifSettingsForm.php +++ b/src/Form/IiifSettingsForm.php @@ -152,14 +152,29 @@ public function buildForm(array $form, FormStateInterface $form_state) { 'Strawberry Runner processors that should be searched against for time based media.' ), '#description' => $this->t( - 'e.g Strawberry Flavor Data might have been generated by the "subtitle" strawberry runners processor. A comma separated list of processors (machine names) that generated time based transcripts encoded as miniOCR.' + 'e.g Strawberry Flavor Data might have been generated by the "subtitle" strawberry runners processor. These will have time based fragments and will match IIIF Annotations with motivation supplementing and target the time based media on the parent Canvas. A comma separated list of processors (machine names) that generated time based transcripts encoded as miniOCR.' ), - '#default_value' => !empty($config->get('iiif_content_search_api_visual_enabled_processors')) ? $config->get( + '#default_value' => !empty($config->get('iiif_content_search_api_time_enabled_processors')) ? $config->get( 'iiif_content_search_api_time_enabled_processors' ) : [], '#required' => FALSE, ]; + $form['iiif_content_search_api_text_enabled_processors'] = [ + '#type' => 'textfield', + '#title' => $this->t( + 'Strawberry Runner processors that should be searched against plain text extractions.' + ), + '#description' => $this->t( + 'e.g Strawberry Flavor Data might have been generated by the "text" strawberry runners processor. These will not have coordinates but will match IIIF Annotations with motivation supplementing and target the whole canvas. A comma separated list of processors (machine names) that generated time based transcripts encoded as miniOCR.' + ), + '#default_value' => !empty($config->get('iiif_content_search_api_text_enabled_processors')) ? $config->get( + 'iiif_content_search_api_text_enabled_processors' + ) : [], + '#required' => FALSE, + ]; + + $form['iiif_content_search_api_file_uri_fields'] = [ '#type' => 'select', '#title' => $this->t( @@ -260,8 +275,13 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $form_state->getValue('iiif_content_search_validate_exposed') ?? FALSE) ->set('iiif_content_search_api_active', $form_state->getValue('iiif_content_search_api_active') ?? FALSE) + ->set('iiif_content_search_api_visual_enabled_processors', + $form_state->getValue('iiif_content_search_api_visual_enabled_processors') ?? '') + ->set('iiif_content_search_api_time_enabled_processors', + $form_state->getValue('iiif_content_search_api_time_enabled_processors') ?? '') + ->set('iiif_content_search_api_text_enabled_processors', + $form_state->getValue('iiif_content_search_api_text_enabled_processors') ?? '') ->save(); - parent::submitForm($form, $form_state); } From ce81ae39e80084502b10d625f9472570f2a0d7c9 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 25 Apr 2024 22:23:55 -0400 Subject: [PATCH 05/34] First pass on generalizing the solr backend query to allow plain text too Next step is to make 3 different queries based on what processors we have (so far this just does OCR so X/Y and Time) And add JMESPATH expressions to extract VTTs and Plain Text from annotations associated to a canvas. @alliomeria more soon! (we are close :) --- .../IiifContentSearchController.php | 86 ++++++++++++++----- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 8974c0e1..b5d2456b 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -2,6 +2,7 @@ namespace Drupal\format_strawberryfield\Controller; +use Drupal\Component\Plugin\Exception\PluginException; use Drupal\Core\Controller\ControllerBase; use Drupal\Core\Entity\ContentEntityInterface; use Drupal\Core\Render\RenderContext; @@ -11,6 +12,7 @@ use Drupal\format_strawberryfield\Entity\MetadataExposeConfigEntity; use Drupal\format_strawberryfield\Tools\IiifHelper; use Drupal\search_api\Query\QueryInterface; +use Drupal\search_api\SearchApiException; use Drupal\strawberryfield\Plugin\search_api\datasource\StrawberryfieldFlavorDatasource; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use Symfony\Component\DependencyInjection\ContainerInterface; @@ -475,20 +477,22 @@ protected function cleanJmesPathResult(array $jmespath_searchresult): array { /** - * OCR Search Controller specific to IIIF Content Seaach Needs + * OCR Search Controller specific to IIIF Content Search Needs * * @param string $term - * @param array $processors - * @param array $image_uris - * @param array $node_ids - * @param int $offset - * @param int $limit - * + * @param array $processors + * The list of processors. Matching processor to $ocr|true|false is done by the caller. + * @param array $file_uris + * @param array $node_ids + * @param int $offset + * @param int $limit + * @param bool $ocr + * If we should use the OCRHighlight extension and the ocr_text field. If not, we will go for normal highlight and sbf_plaintext plaint text. * @return array - * @throws \Drupal\Component\Plugin\Exception\PluginException - * @throws \Drupal\search_api\SearchApiException + * @throws PluginException + * @throws SearchApiException */ - protected function flavorfromSolrIndex(string $term, array $processors, array $image_uris, array $node_ids = [], $offset = 0, $limit = 100) { + protected function flavorfromSolrIndex(string $term, array $processors, array $file_uris, array $node_ids = [], $offset = 0, $limit = 100, $ocr = true) { $indexes = StrawberryfieldFlavorDatasource::getValidIndexes(); @@ -508,8 +512,29 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i $parse_mode = $this->parseModeManager->createInstance('terms'); $query->setParseMode($parse_mode); $query->keys($term); - - $query->setFulltextFields(['ocr_text']); + // @TODO research if we can do a single Query instead of multiple ones? + if ($ocr) { + if (isset($allfields_translated_to_solr['ocr_text'])) { + $query->setFulltextFields(['ocr_text']); + } + else { + $this->loggerFactory->get('format_strawberryfield')->error('We can not execute a Content Search API query against XML OCR without a field named ocr_text of type Full Text Ocr Highlight'); + $search_result['annotations'] = []; + $search_result['total'] = 0; + return $search_result; + } + } + else { + if (isset($allfields_translated_to_solr['sbf_plaintext'])) { + $query->setFulltextFields(['sbf_plaintext']); + } + else { + $this->loggerFactory->get('format_strawberryfield')->error('We can not execute a Content Search API query against Plain Extracted Text without a field named sbf_plaintext of type Full Text'); + $search_result['annotations'] = []; + $search_result['total'] = 0; + return $search_result; + } + } $allfields_translated_to_solr = $search_api_index->getServerInstance() ->getBackend() @@ -535,12 +560,21 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i $query->addCondition('search_api_datasource', 'strawberryfield_flavor_datasource') ->addCondition('processor_id', $processors, 'IN'); - if (isset($allfields_translated_to_solr['ocr_text'])) { - // Will be used by \strawberryfield_search_api_solr_query_alter + if (isset($allfields_translated_to_solr['ocr_text']) && $ocr) { + // Will be used by \Drupal\strawberryfield\EventSubscriber\SearchApiSolrEventSubscriber::preQuery $query->setOption('ocr_highlight', 'on'); // We are already checking if the Node can be viewed. Custom Datasources can not depend on Solr node access policies. $query->setOption('search_api_bypass_access', TRUE); } + if (isset($allfields_translated_to_solr['sbf_plaintext']) && !$ocr) { + // Will be used by \Drupal\strawberryfield\EventSubscriber\SearchApiSolrEventSubscriber::preQuery + + $query->setOption('sbf_highlight_fields', 'on'); + // We are already checking if the Node can be viewed. Custom Datasources can not depend on Solr node access policies. + $query->setOption('search_api_bypass_access', TRUE); + } + + $fields_to_retrieve['id'] = 'id'; if (isset($allfields_translated_to_solr['parent_sequence_id'])) { $fields_to_retrieve['parent_sequence_id'] = $allfields_translated_to_solr['parent_sequence_id']; @@ -555,14 +589,18 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i if (isset($allfields_translated_to_solr['file_uuid'])) { $fields_to_retrieve['file_uuid'] = $allfields_translated_to_solr['file_uuid']; } + else { + $this->loggerFactory->get('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to sarch'); + } - if (count($image_uris)) { + if (count($file_uris)) { //Note here. If we don't have any fields configured the response will contain basically ANYTHING // in the repo. So option 1 is make `iiif_content_search_api_file_uri_fields` required - // bail out if empty? + // bail out if empty? Or, we can add a short limit... that works too for now + // April 2024, to enable in the future postprocessor that generate SBF but not from files (e.g WARC) foreach ($this->iiifConfig->get('iiif_content_search_api_file_uri_fields') ?? [] as $uri_field) { if (isset($allfields_translated_to_solr[$uri_field])) { - $uri_conditions->addCondition($uri_field, $image_uris, 'IN'); + $uri_conditions->addCondition($uri_field, $file_uris, 'IN'); $fields_to_retrieve[$uri_field] = $allfields_translated_to_solr[$uri_field]; } @@ -571,9 +609,11 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i } } } - // This is documented at the API level but maybe our processing level - // Does not trigger it? - // Still keeping it because maybe/someday it will work out! + else { + // in case no files are passed to filter, simply limit all to less? + $query->setOption('limit', 10); + } + // This might/not/be/respected. (API v/s reality) $query->setOption('search_api_retrieved_field_values', array_values($fields_to_retrieve)); // If we allow Extra processing here Drupal adds Content Access Check // That does not match our Data Source \Drupal\search_api\Plugin\search_api\processor\ContentAccess @@ -617,7 +657,6 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i $filedata_by_id[$extradata_from_item['search_api_solr_document']['id']]['sequence_id'] = $real_sequence; } } - foreach ($extradata['search_api_solr_response']['ocrHighlighting'] as $sol_doc_id => $field) { $result_snippets_base = []; if (isset($field[$allfields_translated_to_solr['ocr_text']]['snippets']) && @@ -678,7 +717,6 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i } } } - foreach($fields_to_retrieve as $machine_name => $machine_name_field) { $result_snippets_base['sbf_metadata'][$machine_name] = $filedata_by_id[$sol_doc_id][$machine_name]; } @@ -686,6 +724,10 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $i $result_snippets[] = $result_snippets_base; } } + elseif (isset($extradata['search_api_solr_response'])) { + // if no ocr hl was passed we won't have $extradata['search_api_solr_response']['ocrHighlighting'], so we process + // the other. These results won't have coordinates. + } } } $search_result['annotations'] = $result_snippets; From f952eb787674e0bf8b43107a646d00eebd77e6e1 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 26 Apr 2024 20:14:30 -0400 Subject: [PATCH 06/34] Just a doc type from olde times --- src/Controller/IiifBinaryController.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Controller/IiifBinaryController.php b/src/Controller/IiifBinaryController.php index 6acf529b..3f7f6197 100644 --- a/src/Controller/IiifBinaryController.php +++ b/src/Controller/IiifBinaryController.php @@ -266,7 +266,7 @@ public function servefile(Request $request, ContentEntityInterface $node, string } /** - * Serves the a temp File to its owner. + * Serves a temp File to its owner. * * @param string $uuid * @param string $format From 3e7adee303ff0258559546cf606606d3459c92ec Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 26 Apr 2024 20:14:47 -0400 Subject: [PATCH 07/34] What was this? Gosh --- src/Tools/IiifUrlValidator.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Tools/IiifUrlValidator.php b/src/Tools/IiifUrlValidator.php index 174f094f..182b728b 100644 --- a/src/Tools/IiifUrlValidator.php +++ b/src/Tools/IiifUrlValidator.php @@ -34,8 +34,7 @@ class IiifUrlValidator { * IiifUrlValidator constructor. */ public function __construct() { - $this->httpClient = $this->httpClient = \Drupal::httpClient(); - + $this->httpClient = \Drupal::httpClient(); } /** From 6bce0cfea53055078e890082223117d02e89056f Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 26 Apr 2024 20:15:20 -0400 Subject: [PATCH 08/34] Now this is good. New JMESPATH for Subtitles/VTT + Plus a whole veggie garden of code to support VTT --- .../IiifContentSearchController.php | 265 ++++++++++++------ 1 file changed, 180 insertions(+), 85 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index b5d2456b..9fedf4f5 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -15,6 +15,7 @@ use Drupal\search_api\SearchApiException; use Drupal\strawberryfield\Plugin\search_api\datasource\StrawberryfieldFlavorDatasource; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; +use Ramsey\Uuid\Uuid; use Symfony\Component\DependencyInjection\ContainerInterface; use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpKernel\Exception\BadRequestHttpException; @@ -33,7 +34,9 @@ class IiifContentSearchController extends ControllerBase { /** * A JMESPATH to fetch Canvas Size, Images and their targets IIIF Presentation 3.x */ - CONST IIIF_V3_JMESPATH = "items[?not_null(type, \"@type\") == 'Canvas'].[{width:width,height:height,img_canvas_pairs:items[?type == 'AnnotationPage'][].items[?motivation == 'painting'][body.not_null(id, \"@id\"), not_null(target)][]}][]"; + CONST IIIF_V3_JMESPATH = "items[?not_null(type, \"@type\") == 'Canvas'].[{width:width,height:height,img_canvas_pairs:items[?type == 'AnnotationPage'][].items[?motivation == 'painting' && body.type == 'Image'][body.not_null(id, \"@id\"), not_null(target)][]}][]"; + + CONST IIIF_V3_JMESPATH_VTT ="items[?not_null(type, \"@type\") == 'Canvas'].[{duration:duration,width:width,height:height,vtt_canvas_annotation_triad:items[?type == 'AnnotationPage' && items[?motivation == 'painting' && body.type == 'Video'] && annotations[?type == 'AnnotationPage' && items[?motivation == 'supplementing' && body.format == 'text/vtt']]][].annotations[].items[?motivation=='supplementing' && body.format == 'text/vtt'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; /** * Mime type guesser service. @@ -266,14 +269,47 @@ function () use ($metadataexposeconfig_entity, $node) { } } - $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( - static::IIIF_V3_JMESPATH, $jsonArray - ); + $image_hash = []; + $vtt_hash = []; + $text_hash = []; + // Get the Visual X/Y Processors, split, clean; + $visual_processors = $this->iiifConfig->get('iiif_content_search_api_visual_enabled_processors') ?? 'ocr'; + //@TODO we could do this also on saving? see \Drupal\format_strawberryfield\Form\IiifSettingsForm::submitForm + $visual_processors = explode(",", $visual_processors); + $visual_processors = array_map('trim', $visual_processors); + $visual_processors = array_filter($visual_processors); + + $time_processors = $this->iiifConfig->get('iiif_content_search_api_time_enabled_processors') ?? 'subtitle'; + //@TODO we could do this also on saving? see \Drupal\format_strawberryfield\Form\IiifSettingsForm::submitForm + $time_processors = explode(",", $time_processors); + $time_processors = array_map('trim', $time_processors); + $time_processors = array_filter($time_processors); + + + if (count($visual_processors)) { + $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( + static::IIIF_V3_JMESPATH, $jsonArray + ); + $image_hash = $this->cleanImageJmesPathResult($jmespath_searchresult); + unset($jmespath_searchresult); + if (count($image_hash)) { + $results = $this->flavorfromSolrIndex($the_query_string, $visual_processors, array_keys($image_hash), [], [], ($page * $per_page), $per_page, TRUE); + } + } + if (count($time_processors)) { + $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( + static::IIIF_V3_JMESPATH_VTT, $jsonArray + ); + $vtt_hash = $this->cleanVttJmesPathResult($jmespath_searchresult); + unset($jmespath_searchresult); + // Here we use UUIDs instead + if (count($vtt_hash)) { + $results_time = $this->flavorfromSolrIndex($the_query_string, $time_processors, [], array_keys($vtt_hash), [], ($page * $per_page), $per_page, TRUE); + } + } + - $image_hash = $this->cleanJmesPathResult($jmespath_searchresult); - unset($jmespath_searchresult); - $results = $this->flavorfromSolrIndex($the_query_string, ['ocr'], array_keys($image_hash), [], ($page * $per_page), $per_page); /* Expected structure independent if V2 or V3. result = {array[345]} @@ -287,7 +323,8 @@ function () use ($metadataexposeconfig_entity, $node) { */ $entries = []; $paging_structure = []; - if (count($results['annotations'])) { + // Image/Visual based Annotations + if (count($results['annotations'] ?? [])) { $i = 0; foreach ($results['annotations'] as $hit => $hits_per_file_and_sequence) { foreach ( @@ -307,67 +344,58 @@ function () use ($metadataexposeconfig_entity, $node) { if ($uri) { $canvas = $image_hash[$uri][$sequence_id] ?? []; foreach ($canvas as $canvas_id => $canvas_data) { - if ($canvas_id) { - $canvas_parts = explode("#xywh=", $canvas_id); - if (count($canvas_parts) == 2) { - $canvas_offset = explode(',', $canvas_parts[1]); - $canvas_position = [ - round($annotation['l'] * ($canvas_offset[2] ?? $canvas_data[0]) + $canvas_offset[0]), - round($annotation['t'] * ($canvas_offset[3] ?? $canvas_data[1]) + $canvas_offset[1]), - round(($annotation['r'] - $annotation['l']) * $canvas_offset[2]), - round(($annotation['b'] - $annotation['t']) * $canvas_offset[3]), - ]; - } else { - $canvas_position = [ - round($annotation['l'] * $canvas_data[0]), - round($annotation['t'] * $canvas_data[1]), - round(($annotation['r'] - $annotation['l']) * $canvas_data[0]), - round(($annotation['b'] - $annotation['t']) * $canvas_data[1]), - ]; - } - - /*$canvas_position = [ - $annotation['l'] * 100, - $annotation['t'] * 100, - $annotation['r'] * 100, - $annotation['b'] * 100, - ];*/ - - $canvas_position = "#xywh=" . implode( - ",", $canvas_position - ); - - // V1 - // Generate the entry - if ($version == "v1") { - $entries[] = [ - "@id" => $current_url_clean - . "/annotation/anno-result/$i", - "@type" => "oa:Annotation", - "motivation" => "painting", - "resource" => [ - "@type" => "cnt:ContentAsText", - "chars" => $annotation['snippet'], - ], - "on" => ($canvas_parts[0] ?? $canvas_id) . $canvas_position - ]; - } elseif ($version == "v2") { - $entries[] = [ - "id" => $current_url_clean - . "/annotation/anno-result/$i", - "type" => "Annotation", - "motivation" => "painting", - "body" => [ - "type" => "TextualBody", - "value" => $annotation['snippet'], - "format" => "text/plain", - ], - "target" => $canvas_id . $canvas_position - ]; + if ($canvas_id) { + $canvas_parts = explode("#xywh=", $canvas_id); + if (count($canvas_parts) == 2) { + $canvas_offset = explode(',', $canvas_parts[1]); + $canvas_position = [ + round($annotation['l'] * ($canvas_offset[2] ?? $canvas_data[0]) + $canvas_offset[0]), + round($annotation['t'] * ($canvas_offset[3] ?? $canvas_data[1]) + $canvas_offset[1]), + round(($annotation['r'] - $annotation['l']) * $canvas_offset[2]), + round(($annotation['b'] - $annotation['t']) * $canvas_offset[3]), + ]; + } else { + $canvas_position = [ + round($annotation['l'] * $canvas_data[0]), + round($annotation['t'] * $canvas_data[1]), + round(($annotation['r'] - $annotation['l']) * $canvas_data[0]), + round(($annotation['b'] - $annotation['t']) * $canvas_data[1]), + ]; + } + $canvas_position = "#xywh=" . implode( + ",", $canvas_position + ); + // V1 + // Generate the entry + if ($version == "v1") { + $entries[] = [ + "@id" => $current_url_clean + . "/annotation/anno-result/$i", + "@type" => "oa:Annotation", + "motivation" => "painting", + "resource" => [ + "@type" => "cnt:ContentAsText", + "chars" => $annotation['snippet'], + ], + "on" => ($canvas_parts[0] ?? $canvas_id) . $canvas_position + ]; + } elseif ($version == "v2") { + $entries[] = [ + "id" => $current_url_clean + . "/annotation/anno-result/$i", + "type" => "Annotation", + "motivation" => "painting", + "body" => [ + "type" => "TextualBody", + "value" => $annotation['snippet'], + "format" => "text/plain", + ], + "target" => $canvas_id . $canvas_position + ]; + } } } } - } } } } @@ -430,10 +458,10 @@ function () use ($metadataexposeconfig_entity, $node) { } elseif ($version == "v1") { $iiif_response = [ - "@context" => "http://iiif.io/api/presentation/2/context.json", - "@id" => $current_url_clean, - "@type" => "sc:AnnotationList", - ] + $paging_structure; + "@context" => "http://iiif.io/api/presentation/2/context.json", + "@id" => $current_url_clean, + "@type" => "sc:AnnotationList", + ] + $paging_structure; $iiif_response = $iiif_response + $paging_structure; $iiif_response['resources'] = $entries; } @@ -454,13 +482,13 @@ function () use ($metadataexposeconfig_entity, $node) { } /** - * Cleans the over complex original JMESPATH result to a reversed array. + * Cleans the over complex original JMESPATH result for Images to a reversed array. * * @param array $jmespath_searchresult * * @return array */ - protected function cleanJmesPathResult(array $jmespath_searchresult): array { + protected function cleanImageJmesPathResult(array $jmespath_searchresult): array { $image_hash = []; foreach($jmespath_searchresult as $canvas_order => $entries_percanvas) { foreach (($entries_percanvas['img_canvas_pairs'] ?? []) as $image_canvas_pair) { @@ -475,6 +503,51 @@ protected function cleanJmesPathResult(array $jmespath_searchresult): array { return $image_hash; } + /** + * Cleans the over complex original JMESPATH result for a VTT to a reversed array. + * + * @param array $jmespath_searchresult + * @param bool $targetAnnotation + * If TRUE, we will return the VTT and the annotation itself as the target (allowing multiple VTTs per Canvas) + * If FALSE, we will return the VTT and the Canvas itself as the target (not caring which VTT matched) + * @return array + */ + protected function cleanVttJmesPathResult(array $jmespath_searchresult, $targetAnnotation = TRUE): array { + $vtt_hash = []; + foreach($jmespath_searchresult as $entries_percanvas) { + foreach (($entries_percanvas['vtt_canvas_annotation_triad'] ?? []) as $vtt_canvas_annon_triad) { + $vtt_uuid = NULL; + // VTTs are not IIIF Image API URls... We could use the UUID to load the File entity, Load also the File, compare if it is there, etc + // BUT, we also have the file_uuid in Solr already. + // For http://localhost:8001/do/99161a75-43d8-42ee-8f18-e8d1855640b6/file/5ed0caca-49e8-48d2-9125-dedadaef5b31/download/Train_Departure.vtt + + $path = pathinfo($vtt_canvas_annon_triad[0] ?? '/'); + $parts = explode("/", $path['dirname']); + $parts = array_reverse($parts); + // Might be longer (normally 8), if a subdomain with paths, that is why we reverse that paths + if (count($parts) >= 5 && $parts[0] == "download" && Uuid::isValid($parts[1]) && $parts[2] == "file" && Uuid::isValid($parts[3]) && $parts[4] == "do") { + $vtt_uuid = $parts[1]; + } + if (!$vtt_uuid) { + // just skip if we have no File uuid. + continue; + } + // The $vtt_canvas_annon_triad[1] is the Canvas targeted by the VTT. + // The $vtt_canvas_annon_triad[2] is the AnnotationID containing the VTT. + $sequence = 1 ; + $target = $targetAnnotation ? ($vtt_canvas_annon_triad[2] ?? NULL) : ($vtt_canvas_annon_triad[3] ?? NULL); + if (!$target) { + // just skip if we have no Target. + continue; + } + // We don't use the duration so if not present just give it a second to have a value in this array. + $vtt_hash[$vtt_uuid][$sequence][$target] = [($entries_percanvas["duration"] ?? 1)]; + } + } + unset($jmespath_searchresult); + return $vtt_hash; + } + /** * OCR Search Controller specific to IIIF Content Search Needs @@ -483,6 +556,7 @@ protected function cleanJmesPathResult(array $jmespath_searchresult): array { * @param array $processors * The list of processors. Matching processor to $ocr|true|false is done by the caller. * @param array $file_uris + * @param array $file_uuids * @param array $node_ids * @param int $offset * @param int $limit @@ -492,7 +566,8 @@ protected function cleanJmesPathResult(array $jmespath_searchresult): array { * @throws PluginException * @throws SearchApiException */ - protected function flavorfromSolrIndex(string $term, array $processors, array $file_uris, array $node_ids = [], $offset = 0, $limit = 100, $ocr = true) { + protected function flavorfromSolrIndex(string $term, array $processors, array $file_uris, array $file_uuids, array $node_ids = [], $offset = 0, $limit = 100, $ocr = TRUE): array + { $indexes = StrawberryfieldFlavorDatasource::getValidIndexes(); @@ -512,13 +587,17 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $parse_mode = $this->parseModeManager->createInstance('terms'); $query->setParseMode($parse_mode); $query->keys($term); + + $allfields_translated_to_solr = $search_api_index->getServerInstance() + ->getBackend() + ->getSolrFieldNames($query->getIndex()); // @TODO research if we can do a single Query instead of multiple ones? if ($ocr) { if (isset($allfields_translated_to_solr['ocr_text'])) { $query->setFulltextFields(['ocr_text']); } else { - $this->loggerFactory->get('format_strawberryfield')->error('We can not execute a Content Search API query against XML OCR without a field named ocr_text of type Full Text Ocr Highlight'); + $this->getLogger('format_strawberryfield')->error('We can not execute a Content Search API query against XML OCR without a field named ocr_text of type Full Text Ocr Highlight'); $search_result['annotations'] = []; $search_result['total'] = 0; return $search_result; @@ -529,19 +608,19 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $query->setFulltextFields(['sbf_plaintext']); } else { - $this->loggerFactory->get('format_strawberryfield')->error('We can not execute a Content Search API query against Plain Extracted Text without a field named sbf_plaintext of type Full Text'); + $this->getLogger('format_strawberryfield')->error('We can not execute a Content Search API query against Plain Extracted Text without a field named sbf_plaintext of type Full Text'); $search_result['annotations'] = []; $search_result['total'] = 0; return $search_result; } } + //@TODO: Should this also be a config as `iiif_content_search_api_parent_node_fields` is for example? + $uuid_uri_field = 'file_uuid'; - $allfields_translated_to_solr = $search_api_index->getServerInstance() - ->getBackend() - ->getSolrFieldNames($query->getIndex()); $parent_conditions = $query->createConditionGroup('OR'); $uri_conditions = $query->createConditionGroup('OR'); + $uuid_conditions = $query->createConditionGroup('OR'); // If Nodes are passed use them as conditionals if (count($node_ids)) { @@ -586,13 +665,19 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $fields_to_retrieve['sequence_id'] = $allfields_translated_to_solr['sequence_id']; $query->sort('sequence_id', QueryInterface::SORT_ASC); } - if (isset($allfields_translated_to_solr['file_uuid'])) { - $fields_to_retrieve['file_uuid'] = $allfields_translated_to_solr['file_uuid']; + if (isset($allfields_translated_to_solr[$uuid_uri_field])) { + $fields_to_retrieve[$uuid_uri_field] = $allfields_translated_to_solr[$uuid_uri_field]; + // Sadly we have to add the condition here, what if file_uuid is not defined? + + + + + } else { - $this->loggerFactory->get('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to sarch'); + $this->getLogger('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to sarch'); } - + $have_file_condition = FALSE; if (count($file_uris)) { //Note here. If we don't have any fields configured the response will contain basically ANYTHING // in the repo. So option 1 is make `iiif_content_search_api_file_uri_fields` required @@ -605,11 +690,21 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f = $allfields_translated_to_solr[$uri_field]; } if (count($uri_conditions->getConditions())) { + $have_file_condition = TRUE; $query->addConditionGroup($uri_conditions); } } } - else { + if (count($file_uuids)) { + if (isset($allfields_translated_to_solr[$uuid_uri_field])) { + $uuid_conditions->addCondition($uuid_uri_field, $file_uuids, 'IN'); + } + if (count($uuid_conditions->getConditions())) { + $have_file_condition = TRUE; + $query->addConditionGroup($uuid_conditions); + } + } + if (!$have_file_condition) { // in case no files are passed to filter, simply limit all to less? $query->setOption('limit', 10); } @@ -678,14 +773,14 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $hit = $highlight[0]['text'] ?? $term; $before_and_after = explode("{$hit}", $region_text ?? $term); - // Check if (int) coordinates >=1 (ALTO) + // Check if (int) coordinates lrx >1 (ALTO) ... assuming nothing is at 1px to the right? // else between 0 and < 1 (MINIOCR) $before_index = $shared_parent_region[$parent_region] -1; $before_index = $before_index > 0 ? $before_index : 0; $after_index = $shared_parent_region[$parent_region]; $after_index = ($after_index < count($before_and_after)) ? $after_index : 1; - if ( ((int) $highlight[0]['lrx']) > 0 ){ + if ( ((int) $highlight[0]['lrx']) > 1 ){ //ALTO so coords need to be relative $left = sprintf('%.3f',((float) $highlight[0]['ulx'] / $page_width)); $top = sprintf('%.3f',((float) $highlight[0]['uly'] / $page_height)); From 996c6f0af935ffcbb7fa51734dc24cc7d2a85374 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 27 Apr 2024 14:29:22 -0400 Subject: [PATCH 09/34] Include Audio in the JMESPATH selector --- src/Controller/IiifContentSearchController.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 9fedf4f5..c5c57240 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -36,7 +36,7 @@ class IiifContentSearchController extends ControllerBase { */ CONST IIIF_V3_JMESPATH = "items[?not_null(type, \"@type\") == 'Canvas'].[{width:width,height:height,img_canvas_pairs:items[?type == 'AnnotationPage'][].items[?motivation == 'painting' && body.type == 'Image'][body.not_null(id, \"@id\"), not_null(target)][]}][]"; - CONST IIIF_V3_JMESPATH_VTT ="items[?not_null(type, \"@type\") == 'Canvas'].[{duration:duration,width:width,height:height,vtt_canvas_annotation_triad:items[?type == 'AnnotationPage' && items[?motivation == 'painting' && body.type == 'Video'] && annotations[?type == 'AnnotationPage' && items[?motivation == 'supplementing' && body.format == 'text/vtt']]][].annotations[].items[?motivation=='supplementing' && body.format == 'text/vtt'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; + CONST IIIF_V3_JMESPATH_VTT ="items[?not_null(type, \"@type\") == 'Canvas'].[{duration:duration,width:width,height:height,vtt_canvas_annotation_triad:items[?type == 'AnnotationPage' && items[?motivation == 'painting' && (body.type == 'Video' || body.type == 'Audio')] && annotations[?type == 'AnnotationPage' && items[?motivation == 'supplementing' && body.format == 'text/vtt']]][].annotations[].items[?motivation=='supplementing' && body.format == 'text/vtt'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; /** * Mime type guesser service. From c52c098bd2aaac7c095adac0c2077cd008b05164 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 27 Apr 2024 16:43:09 -0400 Subject: [PATCH 10/34] Update Mirador to 3.3.0 (uff.. who knows right) and allow 4.0.0-alpha @alliomeria 4.0.0 alpha-2 at least allows an user to pick a subtitle, does not enable one by default though. --- .../schema/format_strawberryfield.schema.yml | 3 ++ format_strawberryfield.libraries.yml | 33 +++++++++++++++++-- .../StrawberryMiradorFormatter.php | 33 ++++++++++++++++--- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/config/schema/format_strawberryfield.schema.yml b/config/schema/format_strawberryfield.schema.yml index af99ce44..1ebb86a0 100644 --- a/config/schema/format_strawberryfield.schema.yml +++ b/config/schema/format_strawberryfield.schema.yml @@ -505,6 +505,9 @@ field.formatter.settings.strawberry_mirador_formatter: custom_js: type: boolean label: 'If custom Mirador with plugin is going to be used instead of Vanilla CDN' + mirador_version: + type: integer + label: 'Version 3 or 4 from CDN. Does not apply if custom is selectoed' mediasource: type: mapping label: 'Sources for IIIF URL' diff --git a/format_strawberryfield.libraries.yml b/format_strawberryfield.libraries.yml index 87a6d329..14a296dc 100644 --- a/format_strawberryfield.libraries.yml +++ b/format_strawberryfield.libraries.yml @@ -182,14 +182,27 @@ pdfs_strawberry: - format_strawberryfield/iiif_formatstrawberryfield_utils mirador_projectmirador: - version: 3.1.1 + version: 3.3.0 + license: + name: Apache + url: //github.com/ProjectMirador/mirador/blob/master/LICENSE + gpl-compatible: true + js: + https://cdn.jsdelivr.net/npm/mirador@3.3.0/dist/mirador.min.js: { external: true, minified: true, preprocess: false} + https://cdn.jsdelivr.net/npm/redux-saga@1.2.3/dist/redux-saga.umd.min.js: { external: true, minified: true, preprocess: false} + +mirador_projectmirador_4: + version: 4.0.0-alpha license: name: Apache url: //github.com/ProjectMirador/mirador/blob/master/LICENSE gpl-compatible: true js: - https://cdn.jsdelivr.net/npm/mirador@3.1.1/dist/mirador.min.js: { external: true, minified: true, preprocess: false} + https://unpkg.com/mirador@4.0.0-alpha.2/dist/mirador.min.js: { external: true, minified: true, preprocess: false} https://cdn.jsdelivr.net/npm/redux-saga@1.2.3/dist/redux-saga.umd.min.js: { external: true, minified: true, preprocess: false} + dependencies: + - format_strawberryfield/mirador_font + - format_strawberryfield/iiif_formatstrawberryfield_utils mirador_font: css: @@ -238,6 +251,22 @@ mirador_custom_strawberry: - format_strawberryfield/mirador_font - format_strawberryfield/iiif_formatstrawberryfield_utils +mirador_strawberry_four: + version: 1.4 + js: + js/mirador_strawberry.js: {minified: false} + css: + component: + css/miradorviewer.css: {} + dependencies: + - core/jquery + - core/drupal + - core/once + - core/drupalSettings + - format_strawberryfield/mirador_projectmirador_4 + - format_strawberryfield/mirador_font + - format_strawberryfield/iiif_formatstrawberryfield_utils + uv_strawberry: version: 1.2 js: diff --git a/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php index 89851036..b1a2ec8c 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryMiradorFormatter.php @@ -98,8 +98,8 @@ public function __construct( public static function create( ContainerInterface $container, array $configuration, - $plugin_id, - $plugin_definition + $plugin_id, + $plugin_definition ) { return new static( $plugin_id, @@ -129,6 +129,7 @@ public static function defaultSettings() { 'metadataexposeentity_source' => NULL, 'manifestnodelist_json_key_source' => 'isrelatedto', 'manifesturl_json_key_source' => 'iiifmanifest', + 'mirador_version' => 3, 'custom_js' => FALSE, 'viewer_overrides' => '', 'max_width' => 720, @@ -215,6 +216,22 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '#type' => 'checkbox', '#title' => t('Use Custom Archipelago Mirador with Plugins'), '#default_value' => $this->getSetting('custom_js') ?? FALSE, + '#attributes' => [ + 'data-formatter-selector' => 'custom-js', + ], + ], + 'mirador_version' => [ + '#type' => 'radios', + '#options' => [3 => 'Mirador 3', 4 => 'Mirador 4'], + '#title' => t('Which Version from CDN'), + '#default_value' => $this->getSetting('mirador_version') ?? 3, + '#states' => [ + [ + 'visible' => [ + ':input[data-formatter-selector="custom-js"]' => ['checked' => FALSE], + ] + ], + ] ], 'viewer_overrides' => [ '#type' => 'textarea', @@ -527,7 +544,7 @@ public function viewElements(FieldItemListInterface $items, $langcode) { // if we could decode it, it is already JSON.. $viewer_overrides = $jsondata["ap:viewerhints"][$this->getPluginId()]; } - // A rendered Manifest + // A rendered Manifest if ($hide_on_embargo) { $embargo_info = $this->embargoResolver->embargoInfo( $item->getEntity()->uuid(), $jsondata @@ -629,8 +646,14 @@ public function viewElements(FieldItemListInterface $items, $langcode) { = 'format_strawberryfield/mirador_custom_strawberry'; } else { - $elements[$delta]['#attached']['library'][] - = 'format_strawberryfield/mirador_strawberry'; + if (($this->getSetting('mirador_version') ?? 3) == 3) { + $elements[$delta]['#attached']['library'][] + = 'format_strawberryfield/mirador_strawberry'; + } + else { + $elements[$delta]['#attached']['library'][] + = 'format_strawberryfield/mirador_strawberry_four'; + } } } } From 19e5da4cb21471e51ca6fd25d80ba751d339bc53 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 27 Apr 2024 16:44:19 -0400 Subject: [PATCH 11/34] Fixes race conditon where a Mirador WindowID is set, but does not exist yet So we can still dispatch an event, but not from our Window Dom, but from the parent/initialized Archipelago DIV (will it work? who knows ... but probably was not working before anyways hahaha) --- js/mirador_strawberry.js | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/js/mirador_strawberry.js b/js/mirador_strawberry.js index 5f1122db..2a907626 100644 --- a/js/mirador_strawberry.js +++ b/js/mirador_strawberry.js @@ -121,7 +121,7 @@ const effects = ReduxSaga.effects; /* function* is a generator function thus the yield */ - function* formatStrawverryFieldReact(action) { + function* formatStrawberryFieldReact(action) { const state = yield effects.select(Mirador.actions.getState); const newParams = Object.fromEntries(new URLSearchParams(location.search)) @@ -131,7 +131,12 @@ ) { const { windowId } = action let { visibleCanvases, view, canvasId } = action - const el = document.getElementById(windowId); + var el = document.getElementById(windowId); + if (el == null) { + // Means the window has not yet loaded... it is the first set canvas. + // use the first initialized parent container as `el` so we can dispatch an event if needed + el = document.querySelector(".strawberry-mirador-item[data-iiif-infojson][data-once='attache_mirador']"); + } if ( !visibleCanvases && (action.type === ActionTypes.SET_WINDOW_VIEW_TYPE @@ -189,13 +194,15 @@ currentDrupalNodeId = currentDrupalNodeId.filter(n => n); currentDrupalNodeForViews = currentDrupalNodeForViews.filter(n => n); // Check if currentCanvasMetadata has `dr:nid` could be a single value or an array - if (currentDrupalNodeId.length > 0) { + if (currentDrupalNodeId.length > 0 && el) { Drupal.FormatStrawberryfieldIiifUtils.dispatchAdoChange(el, currentDrupalNodeId, state.config.id); } - if (currentDrupalNodeForViews.length > 0) { + if (currentDrupalNodeForViews.length > 0 && el) { Drupal.FormatStrawberryfieldIiifUtils.dispatchAdoViewChange(el, currentDrupalNodeForViews); } - Drupal.FormatStrawberryfieldIiifUtils.dispatchCanvasChange(el, canvasId, manifestUrl, state.config.id); + if (el) { + Drupal.FormatStrawberryfieldIiifUtils.dispatchCanvasChange(el, canvasId, manifestUrl, state.config.id); + } } else { console.log('IIIF Presentation Manifest V2'); @@ -241,11 +248,11 @@ ActionTypes.REMOVE_SEARCH, ActionTypes.SET_WINDOW_VIEW_TYPE, ], - formatStrawverryFieldReact + formatStrawberryFieldReact ) } - const formatStrawverryFieldReactPlugin = { + const formatStrawberryFieldReactPlugin = { component: () => null, saga: rootSaga, }; @@ -338,8 +345,8 @@ console.log('initializing Custom Mirador 3.3.0') } else { - const miradorInstance = Mirador.viewer($options, [formatStrawverryFieldReactPlugin]); - console.log('initializing Mirador 3.1.1') + const miradorInstance = Mirador.viewer($options, [formatStrawberryFieldReactPlugin]); + console.log('initializing Mirador') if (miradorInstance) { // To allow bubling up we need to add this one to the document // Multiple Miradors will replace each other? @@ -348,7 +355,6 @@ document.addEventListener('sbf:ado:change', CaptureAdoMiradorAdoChange.bind(document, miradorInstance, element_id)); } } - // Work around https://github.com/ProjectMirador/mirador/issues/3486 const mirador_window = document.getElementById(element_id); var observer = new MutationObserver(function(mutations) { From 589d585a2aa9eee48d337e0091ad963288e03fd1 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 27 Apr 2024 16:45:03 -0400 Subject: [PATCH 12/34] Damn annotations. Need to exist inside the canvas, but not at the internal items level. So basically a new JMESPATH selector (wish i had better things to do on a weekend) --- src/Controller/IiifContentSearchController.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index c5c57240..f424398b 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -36,8 +36,7 @@ class IiifContentSearchController extends ControllerBase { */ CONST IIIF_V3_JMESPATH = "items[?not_null(type, \"@type\") == 'Canvas'].[{width:width,height:height,img_canvas_pairs:items[?type == 'AnnotationPage'][].items[?motivation == 'painting' && body.type == 'Image'][body.not_null(id, \"@id\"), not_null(target)][]}][]"; - CONST IIIF_V3_JMESPATH_VTT ="items[?not_null(type, \"@type\") == 'Canvas'].[{duration:duration,width:width,height:height,vtt_canvas_annotation_triad:items[?type == 'AnnotationPage' && items[?motivation == 'painting' && (body.type == 'Video' || body.type == 'Audio')] && annotations[?type == 'AnnotationPage' && items[?motivation == 'supplementing' && body.format == 'text/vtt']]][].annotations[].items[?motivation=='supplementing' && body.format == 'text/vtt'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; - + CONST IIIF_V3_JMESPATH_VTT ="items[?not_null(type, \"@type\") == 'Canvas'].[{duration:duration, width:width, height:height, vtt_canvas_annotation_triad:annotations[].items[?motivation=='supplementing' && body.format == 'text/vtt'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; /** * Mime type guesser service. * From d043074694450e02f51331adc2e2a6a7e9072976 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Apr 2024 17:45:59 -0400 Subject: [PATCH 13/34] More updates. Almost there with the results for time fragments --- .../IiifContentSearchController.php | 92 +++++++++++++++++-- 1 file changed, 82 insertions(+), 10 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index f424398b..10ed477e 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -284,7 +284,6 @@ function () use ($metadataexposeconfig_entity, $node) { $time_processors = array_map('trim', $time_processors); $time_processors = array_filter($time_processors); - if (count($visual_processors)) { $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( static::IIIF_V3_JMESPATH, $jsonArray @@ -307,9 +306,6 @@ function () use ($metadataexposeconfig_entity, $node) { } } - - - /* Expected structure independent if V2 or V3. result = {array[345]} 0 = {array[3]} @@ -398,6 +394,85 @@ function () use ($metadataexposeconfig_entity, $node) { } } } + // Time based Annotations + if (count($results_time['annotations'] ?? [])) { + $i = 0; + foreach ($results_time['annotations'] as $hit => $hits_per_file_and_sequence) { + foreach ( + ($hits_per_file_and_sequence['boxes'] ?? []) as $annotation + ) { + $i++; + // Calculate Canvas and its offset + // PDFs Sequence is correctly detected, but on images it should always be "1" + // For that we will change the response from the main Solr search using our expected ID (splitting) + $uuid_uri_field = 'file_uuid'; + $uuids[] = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; + $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; + $uuids = array_filter($uuids); + $uuid = reset($uuids); + if ($uuid) { + $target = $vtt_hash[$uuid][$sequence_id] ?? []; + foreach ($target as $target_id => $target_data) { + if ($target_id) { + $target_parts = explode("#xywh=", $target_id); + if (count($target_parts) == 2) { + $target_parts = explode(',', $target_parts[1]); + $target_time = [ + round($annotation['l'] * ($canvas_offset[2] ?? $canvas_data[0]) + $canvas_offset[0]), + round($annotation['t'] * ($canvas_offset[3] ?? $canvas_data[1]) + $canvas_offset[1]), + round(($annotation['r'] - $annotation['l']) * $canvas_offset[2]), + round(($annotation['b'] - $annotation['t']) * $canvas_offset[3]), + ]; + } else { + $target_time = [ + round($annotation['t'] * $canvas_data[1]), + round(($annotation['b'] - $annotation['t']) * $canvas_data[1]), + ]; + } + $target_fragment = "#t=" . implode( + ",", $target_time + ); + // V1 + // Generate the entry + if ($version == "v1") { + $entries[] = [ + "@id" => $current_url_clean + . "/annotation/anno-result/$i", + "@type" => "oa:Annotation", + "motivation" => "painting", + "resource" => [ + "@type" => "cnt:ContentAsText", + "chars" => $annotation['snippet'], + ], + "on" => ($target_parts[0] ?? $target_id) . $target_fragment + ]; + } elseif ($version == "v2") { + $entries[] = [ + "id" => $current_url_clean + . "/annotation/anno-result/$i", + "type" => "Annotation", + "motivation" => "painting", + "body" => [ + "type" => "TextualBody", + "value" => $annotation['snippet'], + "format" => "text/plain", + ], + "target" => $target_id . $target_fragment + ]; + } + } + } + } + } + } + } + + + + + + + if (count($entries) == 0) { $results['total'] = 0; } @@ -667,11 +742,6 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f if (isset($allfields_translated_to_solr[$uuid_uri_field])) { $fields_to_retrieve[$uuid_uri_field] = $allfields_translated_to_solr[$uuid_uri_field]; // Sadly we have to add the condition here, what if file_uuid is not defined? - - - - - } else { $this->getLogger('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to sarch'); @@ -758,7 +828,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f foreach ($field[$allfields_translated_to_solr['ocr_text']]['snippets'] as $snippet) { $page_width = (float) $snippet['pages'][0]['width']; $page_height = (float) $snippet['pages'][0]['height']; - + $is_time = str_starts_with($snippet['pages'][0]['id'], 'timesequence_'); $result_snippets_base = [ 'boxes' => $result_snippets_base['boxes'] ?? [], ]; @@ -794,6 +864,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f 'before' => $before_and_after[$before_index] ?? '', 'after' => $before_and_after[$after_index] ?? '', 'hit' => $hit, + 'time' => $is_time, ]; } else { @@ -807,6 +878,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f 'before' => $before_and_after[$before_index] ?? '', 'after' => $before_and_after[$after_index] ?? '', 'hit' => $hit, + 'time' => $is_time ]; } } From c4dfed994c2c170baa86a2e62ee3b08aa5082db0 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Apr 2024 19:14:00 -0400 Subject: [PATCH 14/34] Adds schema variables for Content Search API differentiated queries/processors --- config/schema/format_strawberryfield.schema.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/config/schema/format_strawberryfield.schema.yml b/config/schema/format_strawberryfield.schema.yml index 1ebb86a0..b921e9b7 100644 --- a/config/schema/format_strawberryfield.schema.yml +++ b/config/schema/format_strawberryfield.schema.yml @@ -11,15 +11,24 @@ format_strawberryfield.iiif_settings: iiif_content_search_api_page_count: type: int label: 'The max results to show per page on a iiif_content_search_api_page_count' - iiif_content_search_api_enabled_processors: + iiif_content_search_api_visual_enabled_processors: type: string - label: 'Comma separated list of Strawberry Runner Processors that can be fetched by the Content Search API on query.' + label: 'Comma separated list of Strawberry Runner Processors for Visual Annotations (OCR) that can be fetched by the Content Search API on query.' + iiif_content_search_api_time_enabled_processors: + type: string + label: 'Comma separated list of Strawberry Runner Processors for time Annotations (OCR from time) can be fetched by the Content Search API on query.' + iiif_content_search_api_text_enabled_processors: + type: string + label: 'Comma separated list of Strawberry Runner Processors for plain text annotations (no OCR) that can be fetched by the Content Search API on query.' iiif_content_search_api_active: type: bool label: 'If IIIF Content Search API V1 and V2 are enabled.' iiif_content_search_validate_exposed: type: bool label: 'If IIIF Only explicit definitions in a manifest allow a search against them' + iiif_content_search_validate_exposed: + type: bool + label: 'If IIIF Only explicit definitions in a manifest allow a search against them' iiif_content_search_api_parent_node_fields: type: sequence label: 'Strawberry Flavor Data Source Search API Fields that can be used to connect a Strawberry Flavor to a Parent ADO.' From ec63f0c2fd0c46a26c20ea8978b709d765c70628 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Apr 2024 19:14:42 -0400 Subject: [PATCH 15/34] Adds option so one could return either as target the original Annotation OR a Canvas as target. None of these work with Mirador sadly (at least so far) --- src/Form/IiifSettingsForm.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Form/IiifSettingsForm.php b/src/Form/IiifSettingsForm.php index f79406c6..4cc9955a 100644 --- a/src/Form/IiifSettingsForm.php +++ b/src/Form/IiifSettingsForm.php @@ -159,6 +159,12 @@ public function buildForm(array $form, FormStateInterface $form_state) { ) : [], '#required' => FALSE, ]; + $form['iiif_content_search_time_targetannotations'] = [ + '#type' => 'checkbox', + '#title' => $this->t('Target the VTT Supplementing Annotation'), + '#default_value' => $config->get('iiif_content_search_time_targetannotations') ?? FALSE, + '#description' => $this->t('If enabled (aligned with the specs) the target of a hit result will point to the supplementing Annotation containing in its body the VTT file. If not the Canvas containing in its body a Media Resource (less precise but more compatible with Viewers'), + ]; $form['iiif_content_search_api_text_enabled_processors'] = [ '#type' => 'textfield', @@ -281,6 +287,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $form_state->getValue('iiif_content_search_api_time_enabled_processors') ?? '') ->set('iiif_content_search_api_text_enabled_processors', $form_state->getValue('iiif_content_search_api_text_enabled_processors') ?? '') + ->set('iiif_content_search_time_targetannotations', + $form_state->getValue('iiif_content_search_time_targetannotations') ?? FALSE) ->save(); parent::submitForm($form, $form_state); } From 011bc4e5cdde0fc083bae48f38836d20516c96b0 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Apr 2024 19:14:58 -0400 Subject: [PATCH 16/34] Robuster interface --- .../IiifContentSearchController.php | 90 +++++++++++-------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 10ed477e..9fd986dd 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -294,11 +294,14 @@ function () use ($metadataexposeconfig_entity, $node) { $results = $this->flavorfromSolrIndex($the_query_string, $visual_processors, array_keys($image_hash), [], [], ($page * $per_page), $per_page, TRUE); } } + + $target_annotation = FALSE; if (count($time_processors)) { $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( static::IIIF_V3_JMESPATH_VTT, $jsonArray ); - $vtt_hash = $this->cleanVttJmesPathResult($jmespath_searchresult); + $target_annotation = $this->iiifConfig->get('iiif_content_search_time_targetannotations') ?? FALSE; + $vtt_hash = $this->cleanVttJmesPathResult($jmespath_searchresult, $target_annotation); unset($jmespath_searchresult); // Here we use UUIDs instead if (count($vtt_hash)) { @@ -399,7 +402,7 @@ function () use ($metadataexposeconfig_entity, $node) { $i = 0; foreach ($results_time['annotations'] as $hit => $hits_per_file_and_sequence) { foreach ( - ($hits_per_file_and_sequence['boxes'] ?? []) as $annotation + ($hits_per_file_and_sequence['timespans'] ?? []) as $annotation ) { $i++; // Calculate Canvas and its offset @@ -414,21 +417,10 @@ function () use ($metadataexposeconfig_entity, $node) { $target = $vtt_hash[$uuid][$sequence_id] ?? []; foreach ($target as $target_id => $target_data) { if ($target_id) { - $target_parts = explode("#xywh=", $target_id); - if (count($target_parts) == 2) { - $target_parts = explode(',', $target_parts[1]); - $target_time = [ - round($annotation['l'] * ($canvas_offset[2] ?? $canvas_data[0]) + $canvas_offset[0]), - round($annotation['t'] * ($canvas_offset[3] ?? $canvas_data[1]) + $canvas_offset[1]), - round(($annotation['r'] - $annotation['l']) * $canvas_offset[2]), - round(($annotation['b'] - $annotation['t']) * $canvas_offset[3]), - ]; - } else { - $target_time = [ - round($annotation['t'] * $canvas_data[1]), - round(($annotation['b'] - $annotation['t']) * $canvas_data[1]), - ]; - } + $target_time = [ + round($annotation['s']), + round($annotation['e']) + ]; $target_fragment = "#t=" . implode( ",", $target_time ); @@ -439,7 +431,7 @@ function () use ($metadataexposeconfig_entity, $node) { "@id" => $current_url_clean . "/annotation/anno-result/$i", "@type" => "oa:Annotation", - "motivation" => "painting", + "motivation" => $target_annotation ? "supplementing" : "painting", "resource" => [ "@type" => "cnt:ContentAsText", "chars" => $annotation['snippet'], @@ -451,7 +443,7 @@ function () use ($metadataexposeconfig_entity, $node) { "id" => $current_url_clean . "/annotation/anno-result/$i", "type" => "Annotation", - "motivation" => "painting", + "motivation" => $target_annotation ? "supplementing" : "painting", "body" => [ "type" => "TextualBody", "value" => $annotation['snippet'], @@ -476,7 +468,9 @@ function () use ($metadataexposeconfig_entity, $node) { if (count($entries) == 0) { $results['total'] = 0; } - if ($results['total'] > $this->iiifConfig->get('iiif_content_search_api_results_per_page')) { + $total = ($results['total'] ?? 0) + ($results_time['total'] ?? 0); + + if ($total > $this->iiifConfig->get('iiif_content_search_api_results_per_page')) { $max_page = ceil($results['total']/$this->iiifConfig->get('iiif_content_search_api_results_per_page')) - 1; if ($version == "v1") { $paging_structure = [ @@ -487,7 +481,7 @@ function () use ($metadataexposeconfig_entity, $node) { "last" => $current_url_clean_no_page.'/'.$max_page .'?='.urlencode($the_query_string), ] ]; - if ($results['total'] > (($page+1) * $this->iiifConfig->get('iiif_content_search_api_results_per_page'))) { + if ($total > (($page+1) * $this->iiifConfig->get('iiif_content_search_api_results_per_page'))) { $paging_structure["next"] = $current_url_clean_no_page.'/'.($page + 1).'?='.urlencode($the_query_string); $paging_structure["startIndex"] = $page * $this->iiifConfig->get('iiif_content_search_api_results_per_page'); } @@ -511,7 +505,7 @@ function () use ($metadataexposeconfig_entity, $node) { ] ] ]; - if ($results['total'] > (($page+1) * $this->iiifConfig->get('iiif_content_search_api_results_per_page'))) { + if ($total > (($page+1) * $this->iiifConfig->get('iiif_content_search_api_results_per_page'))) { $paging_structure["next"] = [ "id" => $current_url_clean_no_page.'/'.($page + 1).'?='.urlencode($the_query_string), "type" => "AnnotationPage", @@ -609,7 +603,7 @@ protected function cleanVttJmesPathResult(array $jmespath_searchresult, $targetA // The $vtt_canvas_annon_triad[1] is the Canvas targeted by the VTT. // The $vtt_canvas_annon_triad[2] is the AnnotationID containing the VTT. $sequence = 1 ; - $target = $targetAnnotation ? ($vtt_canvas_annon_triad[2] ?? NULL) : ($vtt_canvas_annon_triad[3] ?? NULL); + $target = $targetAnnotation ? ($vtt_canvas_annon_triad[2] ?? NULL) : ($vtt_canvas_annon_triad[1] ?? NULL); if (!$target) { // just skip if we have no Target. continue; @@ -829,9 +823,16 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $page_width = (float) $snippet['pages'][0]['width']; $page_height = (float) $snippet['pages'][0]['height']; $is_time = str_starts_with($snippet['pages'][0]['id'], 'timesequence_'); - $result_snippets_base = [ - 'boxes' => $result_snippets_base['boxes'] ?? [], - ]; + if ($is_time) { + $result_snippets_base = [ + 'timespans' => $result_snippets_base['timespans'] ?? [], + ]; + } + else { + $result_snippets_base = [ + 'boxes' => $result_snippets_base['boxes'] ?? [], + ]; + } $shared_parent_region = array_fill_keys(array_keys($snippet['regions']), 0); foreach ($snippet['highlights'] as $key => $highlight) { @@ -869,17 +870,32 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f } else { //MINIOCR coords already relative - $result_snippets_base['boxes'][] = [ - 'l' => $highlight[0]['ulx'], - 't' => $highlight[0]['uly'], - 'r' => $highlight[0]['lrx'], - 'b' => $highlight[0]['lry'], - 'snippet' => $region_text, - 'before' => $before_and_after[$before_index] ?? '', - 'after' => $before_and_after[$after_index] ?? '', - 'hit' => $hit, - 'time' => $is_time - ]; + // Deal with time here + if (!$is_time) { + $result_snippets_base['boxes'][] = [ + 'l' => $highlight[0]['ulx'], + 't' => $highlight[0]['uly'], + 'r' => $highlight[0]['lrx'], + 'b' => $highlight[0]['lry'], + 'snippet' => $region_text, + 'before' => $before_and_after[$before_index] ?? '', + 'after' => $before_and_after[$after_index] ?? '', + 'hit' => $hit, + 'time' => $is_time + ]; + } + else { + // It is about time! + $result_snippets_base['timespans'][] = [ + 's' => ($highlight[0]['uly'] * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, + 'e' => (($highlight[0]['uly'] + $highlight[0]['lry']) * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, + 'snippet' => $region_text, + 'before' => $before_and_after[$before_index] ?? '', + 'after' => $before_and_after[$after_index] ?? '', + 'hit' => $hit, + 'time' => $is_time + ]; + } } } } From c521d6dfc118936a55c4f312ff5422ae65fd6a31 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Apr 2024 19:25:29 -0400 Subject: [PATCH 17/34] Wrong Schema --- config/schema/format_strawberryfield.schema.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/schema/format_strawberryfield.schema.yml b/config/schema/format_strawberryfield.schema.yml index b921e9b7..5a6120f3 100644 --- a/config/schema/format_strawberryfield.schema.yml +++ b/config/schema/format_strawberryfield.schema.yml @@ -26,9 +26,9 @@ format_strawberryfield.iiif_settings: iiif_content_search_validate_exposed: type: bool label: 'If IIIF Only explicit definitions in a manifest allow a search against them' - iiif_content_search_validate_exposed: + iiif_content_search_time_targetannotations: type: bool - label: 'If IIIF Only explicit definitions in a manifest allow a search against them' + label: 'If IIIF Content Search results for Time based media target the VTT annotation itself or the parent Canvas' iiif_content_search_api_parent_node_fields: type: sequence label: 'Strawberry Flavor Data Source Search API Fields that can be used to connect a Strawberry Flavor to a Parent ADO.' From d7d6434559e06a0ce7767f87921607a3335c091a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 30 Apr 2024 08:37:14 -0400 Subject: [PATCH 18/34] Reduce the size of the H4/headings on search results/mirador 4 --- css/miradorviewer.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/css/miradorviewer.css b/css/miradorviewer.css index d1d00c35..d59cab18 100644 --- a/css/miradorviewer.css +++ b/css/miradorviewer.css @@ -1,3 +1,7 @@ .MiradorViewer { position:relative; } + +.MiradorViewer h4 { + font-size: 1.2rem; +} From bcd2833a9a53b0432da6e9a0bb973fbc5da56138 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 30 Apr 2024 10:44:30 -0400 Subject: [PATCH 19/34] This kinda adds option for before and after .. but i have not yet implemented the extra "hits" structure for V1 or "Annotations" for V2 that allow the before and after to be used. @alliomeria will explain during our call --- src/Controller/IiifContentSearchController.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 9fd986dd..54e9b621 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -842,7 +842,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $region_text = $snippet['regions'][$parent_region]['text'] ?? $term; $hit = $highlight[0]['text'] ?? $term; - $before_and_after = explode("{$hit}", $region_text ?? $term); + $before_and_after = explode("{$hit}", strip_tags($region_text ?? $term)); // Check if (int) coordinates lrx >1 (ALTO) ... assuming nothing is at 1px to the right? // else between 0 and < 1 (MINIOCR) $before_index = $shared_parent_region[$parent_region] -1; @@ -885,11 +885,17 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f ]; } else { + // IN this case, because on now text spans into other regions, we use 'text' instead of + // $region_text like in a normal HOCR // It is about time! + // Before and after. We will try to split the original text by the math + // If we end with more than 2 pieces, we can't be sure where it was found .. + // so we set them '' ? + $before_and_after = explode($highlight[0]['text'],strip_tags($region_text)); $result_snippets_base['timespans'][] = [ 's' => ($highlight[0]['uly'] * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, - 'e' => (($highlight[0]['uly'] + $highlight[0]['lry']) * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, - 'snippet' => $region_text, + 'e' => ($highlight[0]['lry'] * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, + 'snippet' => $highlight[0]['text'], 'before' => $before_and_after[$before_index] ?? '', 'after' => $before_and_after[$after_index] ?? '', 'hit' => $hit, From 6f7b0cea4c08eb3f9d28cb73b1d4e6da9d7c6bd9 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 30 Apr 2024 12:28:41 -0400 Subject: [PATCH 20/34] Gosh. Don't accumulate file UUIDs. BC a VTT is a single sequence A single file_uuid per hit. --- src/Controller/IiifContentSearchController.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 54e9b621..e3151bef 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -409,10 +409,9 @@ function () use ($metadataexposeconfig_entity, $node) { // PDFs Sequence is correctly detected, but on images it should always be "1" // For that we will change the response from the main Solr search using our expected ID (splitting) $uuid_uri_field = 'file_uuid'; - $uuids[] = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; + // Different than normal OCR. Single UUID per file. + $uuid = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; - $uuids = array_filter($uuids); - $uuid = reset($uuids); if ($uuid) { $target = $vtt_hash[$uuid][$sequence_id] ?? []; foreach ($target as $target_id => $target_data) { @@ -436,7 +435,7 @@ function () use ($metadataexposeconfig_entity, $node) { "@type" => "cnt:ContentAsText", "chars" => $annotation['snippet'], ], - "on" => ($target_parts[0] ?? $target_id) . $target_fragment + "on" => ($target_id) . $target_fragment ]; } elseif ($version == "v2") { $entries[] = [ From caf2ce96836a291da54aab1c418d11236303f22a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 14 May 2024 21:39:32 -0400 Subject: [PATCH 21/34] don't round miliseconds so much W3C specs allow a . see https://www.w3.org/TR/media-frags/#mf-advanced --- src/Controller/IiifContentSearchController.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index e3151bef..06fc588a 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -417,8 +417,8 @@ function () use ($metadataexposeconfig_entity, $node) { foreach ($target as $target_id => $target_data) { if ($target_id) { $target_time = [ - round($annotation['s']), - round($annotation['e']) + round($annotation['s'],2), + round($annotation['e'],2) ]; $target_fragment = "#t=" . implode( ",", $target_time From fb971290553eab27a0c53d789bba3c8692e31211 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 14 May 2024 21:40:44 -0400 Subject: [PATCH 22/34] Haha! Mirador 4 hacked (more like redux-saga-ed) with time fragments @alliomeria you will have a great presentation. I'm not sure about OR (me) though! --- js/mirador_strawberry.js | 282 ++++++++++++++++++++++++--------------- 1 file changed, 172 insertions(+), 110 deletions(-) diff --git a/js/mirador_strawberry.js b/js/mirador_strawberry.js index 2a907626..ccb07b9c 100644 --- a/js/mirador_strawberry.js +++ b/js/mirador_strawberry.js @@ -124,6 +124,67 @@ function* formatStrawberryFieldReact(action) { const state = yield effects.select(Mirador.actions.getState); + + if (action.type === ActionTypes.SELECT_ANNOTATION) { + const { windowId } = action + const searches = yield effects.select(Mirador.selectors.getSearchForWindow, { windowId }); + const current_canvas = yield effects.select(Mirador.selectors.getCurrentCanvas, { windowId }); + let vtt_url = null; + let canvas_id_for_vtt = null; + let canvas_time_for_media = 0; + for (const sideWindow in searches) { + for(const url in searches[sideWindow].data) { + searches[sideWindow].data[url].json.resources.forEach((annotation) => { + if ((annotation['@id'] == action.annotationId)) { + const on = annotation.on.split("#t="); + if (on.length == 2) { + for (const canvas in state.annotations) { + for (const annotation_page in state.annotations[canvas]) { + const matches = state.annotations[canvas][annotation_page].json.items.filter(item => { + return (item['id'] === on[0] && item['body'].format == 'text/vtt'); + }); + if (Array.isArray(matches)) { + vtt_url = matches[0].body.id; + canvas_id_for_vtt = matches[0].target; + canvas_time_for_media = on[1].split(",", 1); + canvas_time_for_media = canvas_time_for_media[0]; + } + } + } + // I should check too if this is "supplementing" so we can decide if we just jump to a time (canvas) + // or load the VTT first and then jump to the canvas targeted by the annotation + // Now the hard part. I need to find this annotation + } + } + } + ); + } + } + if (canvas_id_for_vtt != current_canvas?.id && canvas_id_for_vtt != null) { + // take will wait/ call will run async + let visibleCanvasesForAnnotation = (yield effects.take( Mirador.actions.setCanvas(windowId, canvas_id_for_vtt))).payload.visibleCanvases + } + if (canvas_id_for_vtt != null) { + let MediaWindow = document.getElementById(windowId); + let MediaElement = MediaWindow.querySelector("video, audio"); + if (MediaElement) { + MediaElement.currentTime = canvas_time_for_media; + console.log(`Jumping to time ${canvas_time_for_media}`); + let tracks = MediaElement.querySelectorAll('track'); + for (const track of tracks) { + if (track.src === vtt_url) { + track.selected = true; + track.track.mode = "showing"; + } + else { + track.selected = false; + track.track.mode = "disabled"; + } + } + } + } + } + const newParams = Object.fromEntries(new URLSearchParams(location.search)) if ( action.type === ActionTypes.SET_CANVAS || @@ -247,6 +308,7 @@ ActionTypes.RECEIVE_SEARCH, ActionTypes.REMOVE_SEARCH, ActionTypes.SET_WINDOW_VIEW_TYPE, + ActionTypes.SELECT_ANNOTATION, ], formatStrawberryFieldReact ) @@ -259,126 +321,126 @@ const elementsToAttach = once('attache_mirador', '.strawberry-mirador-item[data-iiif-infojson]', context); $(elementsToAttach).each(function (index, value) { - // Get the node uuid for this element - var element_id = $(this).attr("id"); - // Check if we got some data passed via Drupal settings. - if (typeof(drupalSettings.format_strawberryfield.mirador[element_id]) != 'undefined') { - $(this).height(drupalSettings.format_strawberryfield.mirador[element_id]['height']); - if (drupalSettings.format_strawberryfield.mirador[element_id]['width'] != '100%') { - $(this).width(drupalSettings.format_strawberryfield.mirador[element_id]['width']); - } - // Defines our basic options for Mirador IIIF. - var $options = { - id: element_id, - windows: [{ - manifestId: drupalSettings.format_strawberryfield.mirador[element_id]['manifesturl'], - thumbnailNavigationPosition: 'far-bottom', - }] - }; + // Get the node uuid for this element + var element_id = $(this).attr("id"); + // Check if we got some data passed via Drupal settings. + if (typeof(drupalSettings.format_strawberryfield.mirador[element_id]) != 'undefined') { + $(this).height(drupalSettings.format_strawberryfield.mirador[element_id]['height']); + if (drupalSettings.format_strawberryfield.mirador[element_id]['width'] != '100%') { + $(this).width(drupalSettings.format_strawberryfield.mirador[element_id]['width']); + } + // Defines our basic options for Mirador IIIF. + var $options = { + id: element_id, + windows: [{ + manifestId: drupalSettings.format_strawberryfield.mirador[element_id]['manifesturl'], + thumbnailNavigationPosition: 'far-bottom', + }] + }; - if (drupalSettings.format_strawberryfield.mirador[element_id]['custom_js'] == true) { - $options.window = { - workspaceControlPanel: { - enabled: false - }, - allowClose: false, - imageToolsEnabled: true, - imageToolsOpen: true, - views: [ - { key: 'single', behaviors: [null, 'individuals'] }, - { key: 'book', behaviors: [null, 'paged'] }, - { key: 'scroll', behaviors: ['continuous'] }, - { key: 'gallery' }, - ], - }; - $options.windows[0].workspaceControlPanel = { + if (drupalSettings.format_strawberryfield.mirador[element_id]['custom_js'] == true) { + $options.window = { + workspaceControlPanel: { enabled: false - }; - $options.windows[0].workspace = { - isWorkspaceAddVisible: false, - allowNewWindows: true, - }; - } + }, + allowClose: false, + imageToolsEnabled: true, + imageToolsOpen: true, + views: [ + { key: 'single', behaviors: [null, 'individuals'] }, + { key: 'book', behaviors: [null, 'paged'] }, + { key: 'scroll', behaviors: ['continuous'] }, + { key: 'gallery' }, + ], + }; + $options.windows[0].workspaceControlPanel = { + enabled: false + }; + $options.windows[0].workspace = { + isWorkspaceAddVisible: false, + allowNewWindows: true, + }; + } - var $firstmanifest = [drupalSettings.format_strawberryfield.mirador[element_id]['manifesturl']]; - var $allmanifests = $firstmanifest.concat(drupalSettings.format_strawberryfield.mirador[element_id]['manifestother']); - var $secondmanifest = drupalSettings.format_strawberryfield.mirador[element_id]['manifestother'].find(x=>x!==undefined); + var $firstmanifest = [drupalSettings.format_strawberryfield.mirador[element_id]['manifesturl']]; + var $allmanifests = $firstmanifest.concat(drupalSettings.format_strawberryfield.mirador[element_id]['manifestother']); + var $secondmanifest = drupalSettings.format_strawberryfield.mirador[element_id]['manifestother'].find(x=>x!==undefined); - if (Array.isArray($allmanifests) && $allmanifests.length && typeof($secondmanifest) != 'undefined') { - var $secondwindow = new Object(); - $secondwindow.manifestId = $secondmanifest; - $secondwindow.thumbnailNavigationPosition = 'far-bottom'; - $options.windows.push($secondwindow); - var $manifests = new Object(); - $allmanifests.forEach(manifestURL => { - // TODO Provider should be passed by metadata at - // \Drupal\format_strawberryfield\Plugin\Field\FieldFormatter\StrawberryMiradorFormatter::viewElements - // Deal with this for Beta3 - $manifests[manifestURL] = new Object({'provider':'See Metadata'}); - }) - $options.manifests = $manifests; - } + if (Array.isArray($allmanifests) && $allmanifests.length && typeof($secondmanifest) != 'undefined') { + var $secondwindow = new Object(); + $secondwindow.manifestId = $secondmanifest; + $secondwindow.thumbnailNavigationPosition = 'far-bottom'; + $options.windows.push($secondwindow); + var $manifests = new Object(); + $allmanifests.forEach(manifestURL => { + // TODO Provider should be passed by metadata at + // \Drupal\format_strawberryfield\Plugin\Field\FieldFormatter\StrawberryMiradorFormatter::viewElements + // Deal with this for Beta3 + $manifests[manifestURL] = new Object({'provider':'See Metadata'}); + }) + $options.manifests = $manifests; + } - // Allow last minute overrides. These are more complex bc we have windows as an array and window too. - // Allow a last minute override, exclude main element manifest - if (typeof drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides'] == 'object' && - !Array.isArray(drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides']) && - drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides'] !== null) { - let viewer_override = drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides']; - if (typeof viewer_override?.windows !== 'undefined') { - if (Array.isArray(viewer_override.windows) && viewer_override.windows.length > 0) { - if (viewer_override.windows[0].manifestId !== 'undefined') { - delete viewer_override.windows[0].manifestId; - } + // Allow last minute overrides. These are more complex bc we have windows as an array and window too. + // Allow a last minute override, exclude main element manifest + if (typeof drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides'] == 'object' && + !Array.isArray(drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides']) && + drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides'] !== null) { + let viewer_override = drupalSettings.format_strawberryfield.mirador[element_id]['viewer_overrides']; + if (typeof viewer_override?.windows !== 'undefined') { + if (Array.isArray(viewer_override.windows) && viewer_override.windows.length > 0) { + if (viewer_override.windows[0].manifestId !== 'undefined') { + delete viewer_override.windows[0].manifestId; } } - $options = { - ...$options, - ...viewer_override, - }; } + $options = { + ...$options, + ...viewer_override, + }; + } - //@TODO add an extra Manifests key with every other one so people can select the others. - if (drupalSettings.format_strawberryfield.mirador[element_id]['custom_js'] == true) { - const miradorInstance = renderMirador($options); - console.log('initializing Custom Mirador 3.3.0') - } - else { - const miradorInstance = Mirador.viewer($options, [formatStrawberryFieldReactPlugin]); - console.log('initializing Mirador') - if (miradorInstance) { - // To allow bubling up we need to add this one to the document - // Multiple Miradors will replace each other? - // @TODO check on that diego.. - document.addEventListener('sbf:canvas:change', CaptureAdoMiradorCanvasChange.bind(document, miradorInstance, element_id)); - document.addEventListener('sbf:ado:change', CaptureAdoMiradorAdoChange.bind(document, miradorInstance, element_id)); - } + //@TODO add an extra Manifests key with every other one so people can select the others. + if (drupalSettings.format_strawberryfield.mirador[element_id]['custom_js'] == true) { + const miradorInstance = renderMirador($options); + console.log('initializing Custom Mirador 3.3.0') + } + else { + const miradorInstance = Mirador.viewer($options, [formatStrawberryFieldReactPlugin]); + console.log('initializing Mirador') + if (miradorInstance) { + // To allow bubling up we need to add this one to the document + // Multiple Miradors will replace each other? + // @TODO check on that diego.. + document.addEventListener('sbf:canvas:change', CaptureAdoMiradorCanvasChange.bind(document, miradorInstance, element_id)); + document.addEventListener('sbf:ado:change', CaptureAdoMiradorAdoChange.bind(document, miradorInstance, element_id)); } - // Work around https://github.com/ProjectMirador/mirador/issues/3486 - const mirador_window = document.getElementById(element_id); - var observer = new MutationObserver(function(mutations) { - let mirador_videos = document.querySelectorAll(".mirador-viewer video source"); - if (mirador_videos.length) { - mutations.forEach(function (mutation) { - if ((mutation.target.localName == "video") && (mutation.addedNodes.length > 0) && (typeof(mutation.target.lastChild.src) != "undefined" )) { - mutation.target.src = mutation.target.lastChild.getAttribute('src'); - } - }); - } - let mirador_audios = document.querySelectorAll(".mirador-viewer audio source"); - if (mirador_audios.length) { - mutations.forEach(function (mutation) { - if ((mutation.target.localName == "audio") && (mutation.addedNodes.length > 0) && (typeof(mutation.target.lastChild.src) != "undefined" )) { - mutation.target.src = mutation.target.lastChild.getAttribute('src'); - } - }); - } - }); - observer.observe(mirador_window, { - childList: true, - subtree: true, - }); } - })}} + // Work around https://github.com/ProjectMirador/mirador/issues/3486 + const mirador_window = document.getElementById(element_id); + var observer = new MutationObserver(function(mutations) { + let mirador_videos = document.querySelectorAll(".mirador-viewer video source"); + if (mirador_videos.length) { + mutations.forEach(function (mutation) { + if ((mutation.target.localName == "video") && (mutation.addedNodes.length > 0) && (typeof(mutation.target.lastChild.src) != "undefined" )) { + mutation.target.src = mutation.target.lastChild.getAttribute('src'); + } + }); + } + let mirador_audios = document.querySelectorAll(".mirador-viewer audio source"); + if (mirador_audios.length) { + mutations.forEach(function (mutation) { + if ((mutation.target.localName == "audio") && (mutation.addedNodes.length > 0) && (typeof(mutation.target.lastChild.src) != "undefined" )) { + mutation.target.src = mutation.target.lastChild.getAttribute('src'); + } + }); + } + }); + observer.observe(mirador_window, { + childList: true, + subtree: true, + }); + } + })}} })(jQuery, Drupal, once, drupalSettings, window.Mirador, ReduxSaga); From bc4a29391fffdf388abdeb91ce5cafdf80710eef Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 15 May 2024 20:47:46 -0400 Subject: [PATCH 23/34] Fix to this very complex filter. This one is a clockwork --- .../src/Plugin/views/filter/StrawberryADOfilter.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/format_strawberryfield_views/src/Plugin/views/filter/StrawberryADOfilter.php b/modules/format_strawberryfield_views/src/Plugin/views/filter/StrawberryADOfilter.php index 999a29ac..c9af4e13 100644 --- a/modules/format_strawberryfield_views/src/Plugin/views/filter/StrawberryADOfilter.php +++ b/modules/format_strawberryfield_views/src/Plugin/views/filter/StrawberryADOfilter.php @@ -658,7 +658,7 @@ public function validateExposed(&$form, FormStateInterface $form_state) { } $node_uuids_or_ids = array_filter($node_uuids_or_ids); if ($node_uuids_or_ids) { - $this->validated_exposed_input = $uids; + $this->validated_exposed_input = $node_uuids_or_ids; } } @@ -765,7 +765,7 @@ protected function getEntityRelationsForFields($fields, $cached = TRUE) { } } $cacheability = new CacheableMetadata(); - $cacheability->addCacheableDependency($index); + $cacheability->addCacheableDependency($this->getIndex()); $field_data = []; foreach ($fields as $field_id) { $field_data[$field_id] = $this->calculateEntityRelationsForField( From 3c0da34a529a4f35c9d0c4b1cb3f07a956ef02ae Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 19 May 2024 20:55:52 -0400 Subject: [PATCH 24/34] Adds Webannotation controller for OCR driven annotations + Annotorious on single images Not ready. Lot's to do still. This is :chestnut: --- css/popper.css | 42 + format_strawberryfield.libraries.yml | 28 +- format_strawberryfield.module | 3 + format_strawberryfield.routing.yml | 17 + js/iiif-annotations_strawberry.js | 264 +++ .../js/sbf-views-ajax-dynamic.js | 4 +- ...ormatStrawberryfieldViewAjaxController.php | 3 + ...ewsExposedFormModalBlockAjaxController.php | 1 - .../IiifContentSearchController.php | 4 +- src/Controller/WebAnnotationController.php | 1416 ++++++++++------- 10 files changed, 1223 insertions(+), 559 deletions(-) create mode 100644 css/popper.css create mode 100644 js/iiif-annotations_strawberry.js diff --git a/css/popper.css b/css/popper.css new file mode 100644 index 00000000..c582cfdf --- /dev/null +++ b/css/popper.css @@ -0,0 +1,42 @@ +.popper-background { + background: #333; + color: white; + font-weight: bold; + padding: 4px 8px; + font-size: 13px; + border-radius: 4px; +} + +.popper-arrow, +.popper-arrow::before { + position: absolute; + width: 8px; + height: 8px; + background: inherit; +} + +.popper-arrow { + visibility: hidden; +} + +.popper-arrow::before { + visibility: visible; + content: ''; + transform: rotate(45deg); +} + +.popper-background[data-popper-placement^='top'] >.popper-arrow { + bottom: -4px; +} + +.popper-background[data-popper-placement^='bottom'] > .popper-arrow { + top: -4px; +} + +.popper-background[data-popper-placement^='left'] > .popper-arrow { + right: -4px; +} + +.popper-background[data-popper-placement^='right'] > .popper-arrow { + left: -4px; +} diff --git a/format_strawberryfield.libraries.yml b/format_strawberryfield.libraries.yml index 14a296dc..6ef2c329 100644 --- a/format_strawberryfield.libraries.yml +++ b/format_strawberryfield.libraries.yml @@ -21,6 +21,14 @@ jmespath_strawberry: dependencies: - core/drupal +popper: + js: + https://unpkg.com/@popperjs/core@2 : { external: true, minified: true, preprocess: false} + css: + component: + css/popper.css: { } + + iiif_openseadragon: remote: http://openseadragon.github.io version: 2.4.2 @@ -316,7 +324,6 @@ leaflet_core: component: https://cdn.jsdelivr.net/npm/leaflet@1.9.4/dist/leaflet.css: { external: true} - leaftleft_iiif_bound: version: 1.0 js: @@ -371,17 +378,30 @@ universalviewer: component: https://cdn.jsdelivr.net/npm/universalviewer@4.0.1/dist/uv.css: { external: true, preprocess: false } +iiif_flavor_annotations_strawberry: + version: 1.0 + js: + js/iiif-annotations_strawberry.js: {minified: false} + dependencies: + - core/jquery + - core/once + - core/drupal + - core/drupalSettings + - format_strawberryfield/annotorious + - format_strawberryfield/iiif_formatstrawberryfield_utils + - format_strawberryfield/popper + annotorious: - version: 2.5.10 + version: 2.7.12 license: name: BSD-3 Clause url: https://github.com/recogito/annotorious/blob/master/LICENSE gpl-compatible: true js: - https://cdn.jsdelivr.net/npm/@recogito/annotorious@2.5.10/dist/annotorious.min.js: { external: true, minified: true, preprocess: false} + https://cdn.jsdelivr.net/npm/@recogito/annotorious@2.7.12/dist/annotorious.min.js: { external: true, minified: true, preprocess: false} css: component: - https://cdn.jsdelivr.net/npm/@recogito/annotorious@2.5.10/dist/annotorious.min.css: { external: true } + https://cdn.jsdelivr.net/npm/@recogito/annotorious@2.7.12/dist/annotorious.min.css: { external: true } annotoriousopenseadragon: version: 2.5.16 diff --git a/format_strawberryfield.module b/format_strawberryfield.module index f12fb4f5..da7349b1 100644 --- a/format_strawberryfield.module +++ b/format_strawberryfield.module @@ -28,6 +28,9 @@ function format_strawberryfield_page_attachments(array &$page) { /* if (\Drupal::service('router.admin_context')->isAdminRoute()) { return; }*/ + if (\Drupal::currentUser()->hasPermission('view strawberryfield webannotation')) { + $page['#attached']['library'][] = 'format_strawberryfield/iiif_flavor_annotations_strawberry'; + } $page['#attached']['library'][] = 'format_strawberryfield/lazyload_strawberry'; $page['#attached']['library'][] = 'core/jquery'; diff --git a/format_strawberryfield.routing.yml b/format_strawberryfield.routing.yml index b9560f57..42c8cf48 100644 --- a/format_strawberryfield.routing.yml +++ b/format_strawberryfield.routing.yml @@ -275,6 +275,23 @@ format_strawberryfield.deletetmp_webannotations: _entity_access: 'node.update' _permission: 'add strawberryfield webannotation' +# Persist in a SBF any generated Webannotations. +format_strawberryfield.get_webannotations_fromflavor: + path: '/do/{node}/webannon/readsbf' + methods: [GET] + defaults: + _controller: '\Drupal\format_strawberryfield\Controller\WebAnnotationController::readFromFlavors' + options: + parameters: + node: + type: 'entity:node' + resource_type: + type: 'ado' + requirements: + _format: 'json' + _entity_access: 'node.view' + _permission: 'view strawberryfield webannotation' + # Display settings for each ADO format_strawberryfield.display_settings: path: '/node/{node}/display-settings/{bundle}/{view_mode_name}' diff --git a/js/iiif-annotations_strawberry.js b/js/iiif-annotations_strawberry.js new file mode 100644 index 00000000..0f9942f2 --- /dev/null +++ b/js/iiif-annotations_strawberry.js @@ -0,0 +1,264 @@ +(function ($, Drupal, once, Annotorious) { + + 'use strict'; + + const create_UUID = function() { + var dt = new Date().getTime(); + var uuid = 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { + var r = (dt + Math.random()*16)%16 | 0; + dt = Math.floor(dt/16); + return (c=='x' ? r :(r&0x3|0x8)).toString(16); + }); + return uuid; + } + + var ThreeWaySwitchElement = function(id, opencv_enabled) { + // 3. Triggers callbacks on user action + var setOpenCV = function(evt) { + // annotorious will be here already. + $(evt.target.parentElement).find('> button').each(function () { + $(this).removeClass('active'); + }); + + if (annotorious[evt.target.getAttribute('data-annotorious-id')]._env.hasOwnProperty('openCV')) { + if (annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV == evt.target.name) { + annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV = false; + } else { + annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV = evt.target.name; + $(evt.target).addClass('active'); + } + } + else { + annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV = evt.target.name; + $(evt.target).addClass('active'); + } + } + + + const container = document.createElement('div'); + container.style = "display:inline-flex"; + const toolbar = document.createElement('div'); + toolbar.setAttribute('id', id+ '-annon-toolbar'); + container.appendChild(toolbar); + if (opencv_enabled) { + const input1 = document.createElement('button'); + input1.setAttribute("name","face"); + input1.setAttribute("data-annotorious-id",id); + const input2 = input1.cloneNode(true); + const input3 = input1.cloneNode(true); + input2.setAttribute("name","contour"); + input3.setAttribute("name","contour_adapt"); + input1.setAttribute("value","OpenCV Face Detect"); + input1.setAttribute("id",id + '_face'); + input2.setAttribute("value","OpenCV Countour"); + input2.setAttribute("id",id + '_countour'); + input3.setAttribute("value","OpenCV Countour 2"); + input3.setAttribute("id", id + '_countour_adapt'); + + input1.classList.add('a9s-toolbar-btn','opencv-face'); + input2.classList.add('a9s-toolbar-btn','opencv-contour-light'); + input3.classList.add('a9s-toolbar-btn','opencv-contour-avg'); + input1.addEventListener('click', setOpenCV); + input2.addEventListener('click', setOpenCV); + input3.addEventListener('click', setOpenCV); + container.appendChild(input1); + container.appendChild(input2); + container.appendChild(input3); + } + + return container; + } + + var showMap = function(evt) { + + /* i need to transform xywh=pixel:217.31248474121094,240.13888549804688,2412.823989868164,1761.0184631347656 + into a valid IIIF Image URL. + @TODO make this an extra argument of L.ImageOverlay.iiifBounded and let that function deal with it? + "type": "SvgSelector", type can be an SvgSelector or a Fragment. Fragment allows me to fetch the portion of the image directly + but the SvgSelector needs to be decompositioned in parts and we need to get the max.x,max,y, min.x and min.y to call The IIIF API Image endpoint + "value": "<\/polygon><\/svg>" + Also all the IIIF target parsing could be a separate reusable function! + Note we are not using the "type" here not bc i'm lazy but bc + we are moving data around in Dom Documents dataset properties. So we parse the string + */ + evt.target.disabled = true; + evt.target.className ='r6o-btn outline'; + const IIIFragment = evt.target.dataset.bound.split("="); + if (IIIFragment.length == 0) { + return; + } + let iiif_region = null; + let clip_path = []; + let iiif_geoextension_sourcecoords = []; + // For our not fancy mesh deforming reality this is always so + iiif_geoextension_sourcecoords.push([0,0]); + let clip_path_string = null; + if (IIIFragment[0] === "xywh") { + const IIIFragmentCoords = IIIFragment[1].split(":"); + // @TODO what if using %percentage here? + const IIIFragmentCoordsIndividual = IIIFragmentCoords[1].split(","); + const iiif_coord_lx = Math.round(IIIFragmentCoordsIndividual[0]); + const iiif_coord_ly = Math.round(IIIFragmentCoordsIndividual[1]); + const iiif_coord_rx = Math.round(IIIFragmentCoordsIndividual[2]); + const iiif_coord_ry = Math.round(IIIFragmentCoordsIndividual[3]); + iiif_region = iiif_coord_lx + "," + iiif_coord_ly + "," + iiif_coord_rx + "," + iiif_coord_ry; + iiif_geoextension_sourcecoords.push([Math.floor(iiif_coord_rx - iiif_coord_lx),0]); + iiif_geoextension_sourcecoords.push([Math.floor(iiif_coord_rx - iiif_coord_lx), Math.floor(iiif_coord_ry - iiif_coord_ly)]); + iiif_geoextension_sourcecoords.push([0, Math.floor(iiif_coord_ry - iiif_coord_ly)]); + } + else if (IIIFragment[0] == "OpenStreetMap contributors' + } + const $minzoom = 1; + const $maxzoom = 15; + Leaflet.tileLayer($tilemap.url, + { + attribution: $tilemap.attribution, + maxZoom: $maxzoom, + minZoom: $minzoom + }).addTo(map); + + let allmarkers = []; + allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(41,-70), {draggable:'true'})); + allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(41,-66), {draggable:'true'})); + allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(39,-66), {draggable:'true'})); + allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(39,-70), {draggable:'true'})); + } + + + + var annotorious = []; + var viewers = []; + + Drupal.behaviors.format_strawberryfield_annotations_initiate = { + attach: function (context, settings) { + var annotorious_annotations = []; + var groupssettings = {}; + // Only attach to images that have an ID and a not empty data-sbf-annotations-nodeuuid porperty + const elementsToAttach = once('attache_annotations', 'img[data-sbf-annotations-nodeuuid][id]:not([data-sbf-annotations-nodeuuid=""])', context); + $(elementsToAttach).each(function (index, value) { + // Get the node uuid for this element + let element_id = $(this).attr("id"); + let node_uuid = $(this).data("sbf-annotations-nodeuuid"); + let file_uuid = $(this).data("sbf-annotations-fileuuid"); + let processors = $(this).data("sbf-annotations-processors"); + if (typeof processors !== "undefined") { + groupssettings[element_id] = { + "webannotations" : false, + "nodeuuid" : node_uuid, + "file_uuid" : file_uuid, + "processors" : processors + } + } + }); + $.each(groupssettings, function (element_id, groupssetting) { + function loadFirstAnnotationOfGroup(element_id) { + jQuery.ajax({ + url: '/do/' + groupssetting.nodeuuid + '/webannon/readsbf', + type: "GET", + dataType: 'json', + element_id: element_id, + data: { + 'target_resource_uuid': groupssetting.file_uuid, + 'processors': groupssetting.processors, + }, + success: function (pagedata) { + annotorious[this.element_id].setAnnotations(pagedata); + annotorious_annotations[this.element_id] = [pagedata]; + }, + error: function (xhr, ajaxOptions, thrownError) { + console.log(xhr.status); + } + }); + } + + console.log("Attaching W3C Annotations from Flavors"); + var $readonly = true; + let $widgets = [ + ]; + const $anonconfig = { + "readOnly":$readonly, + "widgets": $widgets, + "image" : document.getElementById(element_id), + } + + annotorious[element_id] = Annotorious.init($anonconfig); + annotorious_annotations[element_id] = []; + loadFirstAnnotationOfGroup(element_id); + let toggle = ThreeWaySwitchElement(element_id, false); + $('#toolbar-' + element_id).prepend(toggle); + annotorious[element_id].on('createSelection', async function(selection) { + if ($readonly) { return; }; + // Extract the image snippet, recording + // - image snippet (as canvas element) + // - x/y coordinate of the snippet top-left (image coordinate space) + // - kx/ky scale factors between canvas element physical and logical dimensions + // Polygon coordinates, in the snippet element's logical coordinate space + }); + annotorious[element_id].on('clickAnnotation', function(annotation, element) { + console.log(element); + console.log(annotation); + // + }); + }); + } + }; +})(jQuery, Drupal, once, window.Annotorious); diff --git a/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js b/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js index 3b3b0913..10200cc7 100644 --- a/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js +++ b/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js @@ -42,8 +42,6 @@ this.pagerAjax = Drupal.ajax(selfSettings); }; - - function loadViewOnClickEvent(e) { // If using the load even we can't relay on the target anymore because // it is bound to the document/window. @@ -101,7 +99,7 @@ ajaxObject.execute(); }; - Drupal.behaviors.sbf_views_ajax_interactions = { + Drupal.behaviors.sbf_views_ajax_dynamic = { attach: function (context, settings) { // the data attributes one can use // [data-sbf-view-id="machine_name_of_a_view"] diff --git a/modules/format_strawberryfield_views/src/Controller/FormatStrawberryfieldViewAjaxController.php b/modules/format_strawberryfield_views/src/Controller/FormatStrawberryfieldViewAjaxController.php index 52187309..e8a33f55 100644 --- a/modules/format_strawberryfield_views/src/Controller/FormatStrawberryfieldViewAjaxController.php +++ b/modules/format_strawberryfield_views/src/Controller/FormatStrawberryfieldViewAjaxController.php @@ -233,6 +233,8 @@ public function ajaxView(Request $request) { $origin_destination .= '?' . $query; unset($used_query_parameters['op']); if ($target_url) { + //Remove views%2Fajax from the URL set to the browser. makes no sense to allow that to be bookmarked. + unset($used_query_parameters['/views/ajax']); $target_url->setOption('query', $used_query_parameters); } } @@ -247,6 +249,7 @@ public function ajaxView(Request $request) { $view->dom_id = $dom_id; $context = new RenderContext(); + $preview = $this->renderer->executeInRenderContext($context, function () use ($view, $display_id, $args) { return $view->preview($display_id, $args); }); diff --git a/modules/format_strawberryfield_views/src/Controller/ViewsExposedFormModalBlockAjaxController.php b/modules/format_strawberryfield_views/src/Controller/ViewsExposedFormModalBlockAjaxController.php index b9782175..190741d1 100644 --- a/modules/format_strawberryfield_views/src/Controller/ViewsExposedFormModalBlockAjaxController.php +++ b/modules/format_strawberryfield_views/src/Controller/ViewsExposedFormModalBlockAjaxController.php @@ -155,5 +155,4 @@ public function ajaxExposedFormBlockView(Request $request) { } return $response; } - } diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 06fc588a..45b59d80 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -617,7 +617,7 @@ protected function cleanVttJmesPathResult(array $jmespath_searchresult, $targetA /** - * OCR Search Controller specific to IIIF Content Search Needs + * OCR/Annnotation Search Controller specific to IIIF Content Search Needs * * @param string $term * @param array $processors @@ -737,7 +737,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f // Sadly we have to add the condition here, what if file_uuid is not defined? } else { - $this->getLogger('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to sarch'); + $this->getLogger('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to search'); } $have_file_condition = FALSE; if (count($file_uris)) { diff --git a/src/Controller/WebAnnotationController.php b/src/Controller/WebAnnotationController.php index f148fca1..8e059656 100644 --- a/src/Controller/WebAnnotationController.php +++ b/src/Controller/WebAnnotationController.php @@ -2,11 +2,15 @@ namespace Drupal\format_strawberryfield\Controller; +use Drupal\Component\Plugin\Exception\PluginException; use Drupal\Core\Controller\ControllerBase; use Drupal\Core\Entity\ContentEntityInterface; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\Render\RendererInterface; +use Drupal\search_api\Query\QueryInterface; +use Drupal\search_api\SearchApiException; use Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem; +use Drupal\strawberryfield\Plugin\search_api\datasource\StrawberryfieldFlavorDatasource; use Drupal\strawberryfield\StrawberryfieldUtilityService; use Symfony\Component\DependencyInjection\ContainerInterface; use Symfony\Component\Mime\MimeTypeGuesserInterface; @@ -18,6 +22,7 @@ use Symfony\Component\HttpKernel\Exception\BadRequestHttpException; use Symfony\Component\HttpKernel\Exception\MethodNotAllowedHttpException; use Drupal\Core\TempStore\PrivateTempStoreFactory; +use Drupal\search_api\ParseMode\ParseModePluginManager; use Drupal\Core\Ajax\AjaxResponse; use Drupal\Core\Ajax\RemoveCommand; use Drupal\Core\Ajax\ReplaceCommand; @@ -28,591 +33,904 @@ class WebAnnotationController extends ControllerBase { - /** - * Symfony\Component\HttpFoundation\RequestStack definition. - * - * @var \Symfony\Component\HttpFoundation\RequestStack - */ - protected $requestStack; - - /** - * The Strawberry Field Utility Service. - * - * @var \Drupal\strawberryfield\StrawberryfieldUtilityService - */ - protected $strawberryfieldUtility; - - - /** - * The Drupal Renderer. - * - * @var \Drupal\Core\Render\RendererInterface - */ - protected $renderer; - - /** - * The MIME type guesser. - * - * @var \Symfony\Component\Mime\MimeTypeGuesserInterface - */ - protected $mimeTypeGuesser; - - /** - * The tempstore. - * - * @var \Drupal\Core\TempStore\SharedTempStore - */ - protected $tempStore; - - /** - * WebAnnotationController constructor. - * - * @param \Symfony\Component\HttpFoundation\RequestStack $request_stack - * The Symfony Request Stack. - * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service - * The SBF Utility Service. - * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entitytype_manager - * The Entity Type Manager. - * @param \Drupal\Core\Render\RendererInterface $renderer - * The Drupal Renderer Service. - * @param \Symfony\Component\Mime\MimeTypeGuesserInterface $mime_type_guesser - * The Drupal Mime type guesser Service. - * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory - * The tempstore factory. - */ - public function __construct( - RequestStack $request_stack, - StrawberryfieldUtilityService $strawberryfield_utility_service, - EntityTypeManagerInterface $entitytype_manager, - RendererInterface $renderer, - MimeTypeGuesserInterface $mime_type_guesser, - PrivateTempStoreFactory $temp_store_factory - - ) { - $this->requestStack = $request_stack; - $this->strawberryfieldUtility = $strawberryfield_utility_service; - $this->entityTypeManager = $entitytype_manager; - $this->renderer = $renderer; - $this->mimeTypeGuesser = $mime_type_guesser; - $this->tempStore = $temp_store_factory->get('webannotation'); - - } - - /** - * {@inheritdoc} - */ - public static function create(ContainerInterface $container) { - return new static( - $container->get('request_stack'), - $container->get('strawberryfield.utility'), - $container->get('entity_type.manager'), - $container->get('renderer'), - $container->get('file.mime_type.guesser'), - $container->get('tempstore.private') - ); - } - - /** - * Persist in temp Storage Webannotation Controller (POST). - * - * @param \Symfony\Component\HttpFoundation\Request - * The Full HTTPD Resquest - * @param \Drupal\Core\Entity\ContentEntityInterface $node - * A Node as argument - * - * @return \Symfony\Component\HttpFoundation\JsonResponse - * A simple JSON response. - */ - public function persist(Request $request, - ContentEntityInterface $node - ) { - if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( - $node - )) { - foreach ($sbf_fields as $field_name) { - // by default only persist in the primary SBF - // @TODO make which SBF will carry the load configurable? - // Also. This is setting the whole list everytime - // Should we deal with add/remove/update/edit independently? - // This means more server logic - // but less traffic? - /* @var $field StrawberryFieldItem */ - $field = $node->get($field_name); - // Symfony6 deprecated getting arrays via get()... like c'mom - // we have to use all()[ - $everything = $this->requestStack->getCurrentRequest()->request->all(); - $annotations = $everything['data'] ?? NULL; - $target = $everything['target_resource'] ?? NULL; - $keystoreid = $everything['keystoreid'] ?? NULL; - $data = [ - 'success' => true - ]; - try { - $existingannotations = $this->tempStore->get($keystoreid); - $existingannotations = is_array($existingannotations) ? $existingannotations : []; - $existingannotations[$target] = $annotations; - $this->tempStore->set($keystoreid, $existingannotations); - } - catch (\Drupal\Core\TempStore\TempStoreException $exception) { - $data = [ - 'success' => false - ]; - } - break; - } + /** + * Symfony\Component\HttpFoundation\RequestStack definition. + * + * @var \Symfony\Component\HttpFoundation\RequestStack + */ + protected $requestStack; + + /** + * The Strawberry Field Utility Service. + * + * @var \Drupal\strawberryfield\StrawberryfieldUtilityService + */ + protected $strawberryfieldUtility; + + + /** + * The Drupal Renderer. + * + * @var \Drupal\Core\Render\RendererInterface + */ + protected $renderer; + + /** + * The MIME type guesser. + * + * @var \Symfony\Component\Mime\MimeTypeGuesserInterface + */ + protected $mimeTypeGuesser; + + /** + * The tempstore. + * + * @var \Drupal\Core\TempStore\SharedTempStore + */ + protected $tempStore; + + /** + * The parse mode manager. + * + * @var \Drupal\search_api\ParseMode\ParseModePluginManager + */ + protected $parseModeManager; + + /** + * WebAnnotationController constructor. + * + * @param \Symfony\Component\HttpFoundation\RequestStack $request_stack + * The Symfony Request Stack. + * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service + * The SBF Utility Service. + * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entitytype_manager + * The Entity Type Manager. + * @param \Drupal\Core\Render\RendererInterface $renderer + * The Drupal Renderer Service. + * @param \Symfony\Component\Mime\MimeTypeGuesserInterface $mime_type_guesser + * The Drupal Mime type guesser Service. + * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory + * The tempstore factory. + */ + public function __construct( + RequestStack $request_stack, + StrawberryfieldUtilityService $strawberryfield_utility_service, + EntityTypeManagerInterface $entitytype_manager, + RendererInterface $renderer, + MimeTypeGuesserInterface $mime_type_guesser, + PrivateTempStoreFactory $temp_store_factory, + ParseModePluginManager $parse_mode_manager + ) { + $this->requestStack = $request_stack; + $this->strawberryfieldUtility = $strawberryfield_utility_service; + $this->entityTypeManager = $entitytype_manager; + $this->renderer = $renderer; + $this->mimeTypeGuesser = $mime_type_guesser; + $this->tempStore = $temp_store_factory->get('webannotation'); + $this->parseModeManager = $parse_mode_manager; } - else { - throw new BadRequestHttpException( - "This Content can not bear Web Annotations!" - ); + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container) { + return new static( + $container->get('request_stack'), + $container->get('strawberryfield.utility'), + $container->get('entity_type.manager'), + $container->get('renderer'), + $container->get('file.mime_type.guesser'), + $container->get('tempstore.private'), + $container->get('plugin.manager.search_api.parse_mode'), + $container->get('config.factory') + ); } - return new JsonResponse($data); - } - - /** - * Updates an existing WebAnnotation Method (PUSH). - * - * @param \Symfony\Component\HttpFoundation\Request - * The Full HTTPD Resquest - * @param \Drupal\Core\Entity\ContentEntityInterface $node - * A Node as argument - * - * @return \Symfony\Component\HttpFoundation\JsonResponse - * A simple JSON response. - */ - public function putTemp(Request $request, - ContentEntityInterface $node - ) { - if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( - $node - )) { - - // We are getting which field originate the annotations from AJAX. - // Symfony6 deprecated getting arrays via get()... like c'mom - // we have to use all()[ - $everything = $this->requestStack->getCurrentRequest()->request->all(); - $annotation = $everything['data'] ?? NULL; - $target = $everything['target_resource'] ?? NULL; - $keystoreid = $everything['keystoreid'] ?? NULL; - $data = [ - 'success' => true - ]; - - try { - $persisted = FALSE; - if (isset($annotation['id'])) { - $existingannotations = $this->tempStore->get($keystoreid); - $existingannotations = is_array($existingannotations) ? $existingannotations : []; - if (isset($existingannotations[$target])) { - foreach ($existingannotations[$target] as $key => $existingannotation) { - if (($existingannotation['id'] == $annotation['id'])) { - $existingannotations[$target][$key] = $annotation; - $persisted = TRUE; + /** + * Persist in temp Storage Webannotation Controller (POST). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * + * @return \Symfony\Component\HttpFoundation\JsonResponse + * A simple JSON response. + */ + public function persist(Request $request, + ContentEntityInterface $node + ) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) { + foreach ($sbf_fields as $field_name) { + // by default only persist in the primary SBF + // @TODO make which SBF will carry the load configurable? + // Also. This is setting the whole list everytime + // Should we deal with add/remove/update/edit independently? + // This means more server logic + // but less traffic? + /* @var $field StrawberryFieldItem */ + $field = $node->get($field_name); + // Symfony6 deprecated getting arrays via get()... like c'mom + // we have to use all()[ + $everything = $this->requestStack->getCurrentRequest()->request->all(); + $annotations = $everything['data'] ?? NULL; + $target = $everything['target_resource'] ?? NULL; + $keystoreid = $everything['keystoreid'] ?? NULL; + $data = [ + 'success' => true + ]; + try { + $existingannotations = $this->tempStore->get($keystoreid); + $existingannotations = is_array($existingannotations) ? $existingannotations : []; + $existingannotations[$target] = $annotations; + $this->tempStore->set($keystoreid, $existingannotations); + } + catch (\Drupal\Core\TempStore\TempStoreException $exception) { + $data = [ + 'success' => false + ]; + } break; - } - } - if ($persisted == FALSE) { - throw new MethodNotAllowedHttpException(['PUT'], - "The Annotation has no unique id!" - ); - } // means it was new - } - $this->tempStore->set($keystoreid, $existingannotations); + } } else { - throw new BadRequestHttpException( - "The Annotation has no unique id!" - ); + throw new BadRequestHttpException( + "This Content can not bear Web Annotations!" + ); } - } - catch (\Drupal\Core\TempStore\TempStoreException $exception) { - $data = [ - 'success' => false - ]; - } - } - else { - throw new BadRequestHttpException( - "This Content can not bear Web Annotations!" - ); + + return new JsonResponse($data); } - return new JsonResponse($data); - } - - - - /** - * Persist temp Controller Method (POST). - * - * @param \Symfony\Component\HttpFoundation\Request - * The Full HTTPD Resquest - * @param \Drupal\Core\Entity\ContentEntityInterface $node - * A Node as argument - * - * @return \Symfony\Component\HttpFoundation\JsonResponse - * A simple JSON response. - */ - public function postTemp(Request $request, - ContentEntityInterface $node - ) { - if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( - $node - )) { - - // We are getting which field originate the annotations from AJAX. - // Symfony6 deprecated getting arrays via get()... like c'mom - // we have to use all()[ - $everything = $this->requestStack->getCurrentRequest()->request->all(); - $annotation = $everything['data'] ?? NULL; - $target = $everything['target_resource'] ?? NULL; - $keystoreid = $everything['keystoreid'] ?? NULL; - $data = [ - 'success' => true - ]; - try { - $persisted = FALSE; - if (isset($annotation['id'])) { - $existingannotations = $this->tempStore->get($keystoreid); - $existingannotations = is_array($existingannotations) ? $existingannotations : []; - if (isset($existingannotations[$target])) { - foreach ($existingannotations[$target] as $key => &$existingannotation) { - if (($existingannotation['id'] == $annotation['id'])) { - throw new MethodNotAllowedHttpException(['POST'], - "The ID is already present, to update use PUT method" - ); - } - } - } + /** + * Updates an existing WebAnnotation Method (PUSH). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * + * @return \Symfony\Component\HttpFoundation\JsonResponse + * A simple JSON response. + */ + public function putTemp(Request $request, + ContentEntityInterface $node + ) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) { + + // We are getting which field originate the annotations from AJAX. + // Symfony6 deprecated getting arrays via get()... like c'mom + // we have to use all()[ + $everything = $this->requestStack->getCurrentRequest()->request->all(); + $annotation = $everything['data'] ?? NULL; + $target = $everything['target_resource'] ?? NULL; + $keystoreid = $everything['keystoreid'] ?? NULL; + $data = [ + 'success' => true + ]; - $existingannotations[$target][] = $annotation; - $this->tempStore->set($keystoreid, $existingannotations); + try { + $persisted = FALSE; + if (isset($annotation['id'])) { + $existingannotations = $this->tempStore->get($keystoreid); + $existingannotations = is_array($existingannotations) ? $existingannotations : []; + if (isset($existingannotations[$target])) { + foreach ($existingannotations[$target] as $key => $existingannotation) { + if (($existingannotation['id'] == $annotation['id'])) { + $existingannotations[$target][$key] = $annotation; + $persisted = TRUE; + break; + } + } + if ($persisted == FALSE) { + throw new MethodNotAllowedHttpException(['PUT'], + "The Annotation has no unique id!" + ); + } // means it was new + } + $this->tempStore->set($keystoreid, $existingannotations); + } + else { + throw new BadRequestHttpException( + "The Annotation has no unique id!" + ); + } + } + catch (\Drupal\Core\TempStore\TempStoreException $exception) { + $data = [ + 'success' => false + ]; + } } else { - throw new BadRequestHttpException( - "The Annotation has no unique id!" - ); + throw new BadRequestHttpException( + "This Content can not bear Web Annotations!" + ); } - } - catch (\Drupal\Core\TempStore\TempStoreException $exception) { - $data = [ - 'success' => false - ]; - } - } - else { - throw new BadRequestHttpException( - "This Content can not bear Web Annotations!" - ); + + return new JsonResponse($data); } - return new JsonResponse($data); - } - - /** - * Delete temp Controller Method (POST). - * - * @param \Symfony\Component\HttpFoundation\Request - * The Full HTTPD Resquest - * @param \Drupal\Core\Entity\ContentEntityInterface $node - * A Node as argument - * @param string $keystoreid - * The keystore id to delete - * - * @return AjaxResponse - * A cacheable response. - */ - public function deleteKeyStore(Request $request, ContentEntityInterface $node) { - if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( - $node - )) { - $response = new AjaxResponse(); - foreach ($sbf_fields as $field_name) { - /* @var $field \Drupal\Core\Field\FieldItemInterface */ - $field = $node->get($field_name); - /** @var $field \Drupal\Core\Field\FieldItemList */ - foreach ($field->getIterator() as $delta => $itemfield) { - $keystoreid = static::getTempStoreKeyName( - $field_name, - $delta, - $node->uuid() - ); - try { - $this->tempStore->delete(trim($keystoreid)); - } catch (\Drupal\Core\TempStore\TempStoreException $exception) { - $response->addCommand( - new ReplaceCommand( - '#edit-webannotations > div', - 'Something went awfully wrong and we could not discard your Annotation. Please try again.' - ) + + + /** + * Persist temp Controller Method (POST). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * + * @return \Symfony\Component\HttpFoundation\JsonResponse + * A simple JSON response. + */ + public function postTemp(Request $request, + ContentEntityInterface $node + ) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) { + + // We are getting which field originate the annotations from AJAX. + // Symfony6 deprecated getting arrays via get()... like c'mom + // we have to use all()[ + $everything = $this->requestStack->getCurrentRequest()->request->all(); + $annotation = $everything['data'] ?? NULL; + $target = $everything['target_resource'] ?? NULL; + $keystoreid = $everything['keystoreid'] ?? NULL; + $data = [ + 'success' => true + ]; + try { + $persisted = FALSE; + if (isset($annotation['id'])) { + $existingannotations = $this->tempStore->get($keystoreid); + $existingannotations = is_array($existingannotations) ? $existingannotations : []; + if (isset($existingannotations[$target])) { + foreach ($existingannotations[$target] as $key => &$existingannotation) { + if (($existingannotation['id'] == $annotation['id'])) { + throw new MethodNotAllowedHttpException(['POST'], + "The ID is already present, to update use PUT method" + ); + } + } + } + + $existingannotations[$target][] = $annotation; + $this->tempStore->set($keystoreid, $existingannotations); + } + else { + throw new BadRequestHttpException( + "The Annotation has no unique id!" + ); + } + } + catch (\Drupal\Core\TempStore\TempStoreException $exception) { + $data = [ + 'success' => false + ]; + } + } + else { + throw new BadRequestHttpException( + "This Content can not bear Web Annotations!" ); - return $response; - } } - } - } - else { - throw new BadRequestHttpException( - "This Content can not bear Web Annotations!" - ); + + return new JsonResponse($data); } - $response->addCommand(new RemoveCommand('#edit-webannotations')); - return $response; - } - - /** - * Read existing WebAnnotations Controller Method (GET). - * - * @param \Symfony\Component\HttpFoundation\Request - * The Full HTTPD Resquest - * @param \Drupal\Core\Entity\ContentEntityInterface $node - * A Node as argument - * - * @return \Drupal\Core\Cache\CacheableJsonResponse|\Drupal\Core\Cache\CacheableResponse - * A cacheable response. - */ - public function read(Request $request, - ContentEntityInterface $node - ) { - - // WE do not want cache here - // But starting to think Anonymous users should not use the tempStore at all. - $build = [ - '#cache' => [ - 'max-age' => 0, - ], - ]; - - - $return = []; - // GET Argument ( - $target = $this->requestStack->getCurrentRequest()->query->get('target_resource'); - - if (($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( - $node - )) && !empty(trim($target))) { - - // We are getting which field originate the annotations from AJAX. - // This time Ajax - $keystoreid = $this->requestStack->getCurrentRequest()->query->get('keystoreid'); - - $data = [ - 'success' => true - ]; - - try { - // See \Drupal\format_strawberryfield\Plugin\Field\FieldFormatter\StrawberryMediaFormatter::viewElements - // It would have set initial values so we do not need to read/iterate everytime - $existingannotations = $this->tempStore->get($keystoreid); - if ($existingannotations == null) { - foreach ($sbf_fields as $field_name) { - /* @var $field \Drupal\Core\Field\FieldItemInterface */ - $field = $node->get($field_name); - /** @var $field \Drupal\Core\Field\FieldItemList */ - foreach ($field->getIterator() as $delta => $itemfield) { - $potentialkeystoreid = static::getTempStoreKeyName( - $field_name, - $delta, - $node->uuid() - ); - if ($potentialkeystoreid == $keystoreid) { - $existingannotations = static::primeKeyStore($itemfield, $keystoreid); - break 2; - } - } - } + /** + * Delete temp Controller Method (POST). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * @param string $keystoreid + * The keystore id to delete + * + * @return AjaxResponse + * A cacheable response. + */ + public function deleteKeyStore(Request $request, ContentEntityInterface $node) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) { + $response = new AjaxResponse(); + foreach ($sbf_fields as $field_name) { + /* @var $field \Drupal\Core\Field\FieldItemInterface */ + $field = $node->get($field_name); + /** @var $field \Drupal\Core\Field\FieldItemList */ + foreach ($field->getIterator() as $delta => $itemfield) { + $keystoreid = static::getTempStoreKeyName( + $field_name, + $delta, + $node->uuid() + ); + try { + $this->tempStore->delete(trim($keystoreid)); + } catch (\Drupal\Core\TempStore\TempStoreException $exception) { + $response->addCommand( + new ReplaceCommand( + '#edit-webannotations > div', + 'Something went awfully wrong and we could not discard your Annotation. Please try again.' + ) + ); + return $response; + } + } + } } - $return = isset($existingannotations[$target]) && is_array($existingannotations[$target]) ? $existingannotations[$target] : []; - } - catch (\Drupal\Core\TempStore\TempStoreException $exception) { - throw new ServiceUnavailableHttpException( - "Temporary Storage for WebAnnotations is not working. Contact your admin." - ); - } - } - else { - throw new BadRequestHttpException( - "Wrong request" - ); + else { + throw new BadRequestHttpException( + "This Content can not bear Web Annotations!" + ); + } + + $response->addCommand(new RemoveCommand('#edit-webannotations')); + return $response; } - $response = new CacheableJsonResponse($return); - $response->addCacheableDependency($build); - $response->addCacheableDependency($node); - - return $response; - } - - /** - * Persist temp Controller Method (POST). - * - * @param \Symfony\Component\HttpFoundation\Request - * The Full HTTPD Resquest - * @param \Drupal\Core\Entity\ContentEntityInterface $node - * A Node as argument - * - * @return \Symfony\Component\HttpFoundation\JsonResponse; - * A cacheable response. - */ - public function deleteTemp(Request $request, - ContentEntityInterface $node - ) { - if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( - $node - )) { - - // We are getting which field originate the annotations from AJAX. - $everything = $this->requestStack->getCurrentRequest()->request->all(); - $annotation = $everything['data'] ?? NULL; - $target = $everything['target_resource'] ?? NULL; - $keystoreid = $everything['keystoreid'] ?? NULL; - - $data = [ - 'success' => true - ]; - try { - if (isset($annotation['id'])) { - $existingannotations = $this->tempStore->get($keystoreid); - $existingannotations = is_array($existingannotations) ? $existingannotations : []; - if (isset($existingannotations[$target])) { - foreach ($existingannotations[$target] as $key => $existingannotation) { - if (($existingannotation['id'] == $annotation['id'])) { - unset($existingannotations[$target][$key]); - break; - } + + /** + * Read existing WebAnnotations Controller Method (GET). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * + * @return \Drupal\Core\Cache\CacheableJsonResponse|\Drupal\Core\Cache\CacheableResponse + * A cacheable response. + */ + public function read(Request $request, + ContentEntityInterface $node + ) { + + // WE do not want cache here + // But starting to think Anonymous users should not use the tempStore at all. + $build = [ + '#cache' => [ + 'max-age' => 0, + ], + ]; + + + $return = []; + // GET Argument ( + $target = $this->requestStack->getCurrentRequest()->query->get('target_resource'); + + if (($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) && !empty(trim($target))) { + + // We are getting which field originate the annotations from AJAX. + // This time Ajax + $keystoreid = $this->requestStack->getCurrentRequest()->query->get('keystoreid'); + + $data = [ + 'success' => true + ]; + + try { + // See \Drupal\format_strawberryfield\Plugin\Field\FieldFormatter\StrawberryMediaFormatter::viewElements + // It would have set initial values, so we do not need to read/iterate everytime + $existingannotations = $this->tempStore->get($keystoreid); + if ($existingannotations == null) { + foreach ($sbf_fields as $field_name) { + /* @var $field \Drupal\Core\Field\FieldItemInterface */ + $field = $node->get($field_name); + /** @var $field \Drupal\Core\Field\FieldItemList */ + foreach ($field->getIterator() as $delta => $itemfield) { + $potentialkeystoreid = static::getTempStoreKeyName( + $field_name, + $delta, + $node->uuid() + ); + if ($potentialkeystoreid == $keystoreid) { + $existingannotations = static::primeKeyStore($itemfield, $keystoreid); + break 2; + } + } + } + } + $return = isset($existingannotations[$target]) && is_array($existingannotations[$target]) ? $existingannotations[$target] : []; } - // Make sure we reorder them so they stay as indexed arrays - if (empty(!$existingannotations[$target])) { - $existingannotations[$target] = array_values($existingannotations[$target]); - } else { - //If empty totally remove - unset($existingannotations[$target]); + catch (\Drupal\Core\TempStore\TempStoreException $exception) { + throw new ServiceUnavailableHttpException( + "Temporary Storage for WebAnnotations is not working. Contact your admin." + ); } - } - $this->tempStore->set($keystoreid, $existingannotations); } else { - throw new BadRequestHttpException( - "The Annotation has no unique id!" - ); + throw new BadRequestHttpException( + "Wrong request" + ); } - } - catch (\Drupal\Core\TempStore\TempStoreException $exception) { - $data = [ - 'success' => false + $response = new CacheableJsonResponse($return); + $response->addCacheableDependency($build); + $response->addCacheableDependency($node); + + return $response; + } + + /** + * Read existing WebAnnotations From Solr/Flavors Controller Method (GET). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * + * @return \Drupal\Core\Cache\CacheableJsonResponse|\Drupal\Core\Cache\CacheableResponse + * A cacheable response. + */ + public function readFromFlavors(Request $request, + ContentEntityInterface $node + ) { + + // WE do not want cache here + // But starting to think Anonymous users should not use the tempStore at all. + $build = [ + '#cache' => [ + 'max-age' => -1, + ], ]; - } + + $return = []; + $existingannotations = []; + // GET Argument ( + $target = $this->requestStack->getCurrentRequest()->query->get('target_resource_uuid'); + // Processors need to exist. + // We might want to (eventually) decide if we want OCR (normal page level) to be fetched + // as individual annotations or not at all. + $processors = $this->requestStack->getCurrentRequest()->query->get('processors', NULL); + if ($processors) { + if (($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) && !empty(trim($target))) { + } + $data = [ + 'success' => true + ]; + try { + // See \Drupal\format_strawberryfield\Plugin\Field\FieldFormatter\StrawberryMediaFormatter::viewElements + // It would have set initial values, so we do not need to read/iterate everytime + $file = $this->entityTypeManager()->getStorage('file')->loadByProperties(['uuid' => $target]); + if (count($file) == 0) { + throw new BadRequestHttpException( + "Wrong request" + ); + } + + + $existingannotations[$target] = $this->flavorfromSolrIndex([$processors ?? 'ml_yolov8'], [$file->uri],[$target] , [$node->uuid()]); + + $return = isset($existingannotations[$target]) && is_array($existingannotations[$target]) ? $existingannotations[$target] : []; + } + catch (\Exception $exception) { + throw new ServiceUnavailableHttpException( + "Annotation from StrawberryFlavor fetching failed. Contact your admin." + ); + } + } + else { + throw new BadRequestHttpException( + "Wrong request" + ); + } + $response = new CacheableJsonResponse($return); + $response->addCacheableDependency($build); + $response->addCacheableDependency($node); + + return $response; } - else { - throw new BadRequestHttpException( - "This Content can not bear Web Annotations!" - ); + + + + + /** + * Persist temp Controller Method (POST). + * + * @param \Symfony\Component\HttpFoundation\Request + * The Full HTTPD Resquest + * @param \Drupal\Core\Entity\ContentEntityInterface $node + * A Node as argument + * + * @return \Symfony\Component\HttpFoundation\JsonResponse; + * A cacheable response. + */ + public function deleteTemp(Request $request, + ContentEntityInterface $node + ) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $node + )) { + + // We are getting which field originate the annotations from AJAX. + $everything = $this->requestStack->getCurrentRequest()->request->all(); + $annotation = $everything['data'] ?? NULL; + $target = $everything['target_resource'] ?? NULL; + $keystoreid = $everything['keystoreid'] ?? NULL; + + $data = [ + 'success' => true + ]; + try { + if (isset($annotation['id'])) { + $existingannotations = $this->tempStore->get($keystoreid); + $existingannotations = is_array($existingannotations) ? $existingannotations : []; + if (isset($existingannotations[$target])) { + foreach ($existingannotations[$target] as $key => $existingannotation) { + if (($existingannotation['id'] == $annotation['id'])) { + unset($existingannotations[$target][$key]); + break; + } + } + // Make sure we reorder them so they stay as indexed arrays + if (empty(!$existingannotations[$target])) { + $existingannotations[$target] = array_values($existingannotations[$target]); + } else { + //If empty totally remove + unset($existingannotations[$target]); + } + } + $this->tempStore->set($keystoreid, $existingannotations); + } + else { + throw new BadRequestHttpException( + "The Annotation has no unique id!" + ); + } + } + catch (\Drupal\Core\TempStore\TempStoreException $exception) { + $data = [ + 'success' => false + ]; + } + } + else { + throw new BadRequestHttpException( + "This Content can not bear Web Annotations!" + ); + } + + return new JsonResponse($data); } - return new JsonResponse($data); - } - - /* - * Delete temp Controller Method (POST). - * - - */ - public static function deleteKeyStoreAjaxCallback(array &$form, FormStateInterface $form_state) { - // Ok, this has a lot of Static loading of services instead of Injected dependency - // But AJAX callbacks are sadly static! Gosh, sorry good coding practices avatars - $response = new AjaxResponse(); - if ($form_state->get('hadAnnotations')) { - $node = $form_state->getFormObject()->getEntity(); - $tempstore = \Drupal::service('tempstore.private')->get( - 'webannotation' - ); - if ($sbf_fields = \Drupal::service('strawberryfield.utility') - ->bearsStrawberryfield($node)) { - - foreach ($sbf_fields as $field_name) { - /* @var $field \Drupal\Core\Field\FieldItemInterface */ - $field = $node->get($field_name); - /** @var $field \Drupal\Core\Field\FieldItemList */ - foreach ($field->getIterator() as $delta => $itemfield) { - $keystoreid = static::getTempStoreKeyName( - $field_name, - $delta, - $node->uuid() + /* + * Delete temp Controller Method (POST). + * + + */ + public static function deleteKeyStoreAjaxCallback(array &$form, FormStateInterface $form_state) { + // Ok, this has a lot of Static loading of services instead of Injected dependency + // But AJAX callbacks are sadly static! Gosh, sorry good coding practices avatars + $response = new AjaxResponse(); + if ($form_state->get('hadAnnotations')) { + $node = $form_state->getFormObject()->getEntity(); + $tempstore = \Drupal::service('tempstore.private')->get( + 'webannotation' ); - try { - $tempstore->delete($keystoreid); - // Do NOT SET stored settings back. - // BECAUSE WE HAVE NOT RELOADED OUR NODE from storage yet OK? - } catch (\Drupal\Core\TempStore\TempStoreException $exception) { - $response->addCommand( - new ReplaceCommand( - '#edit-webannotations > div', - 'So Sorry Something went awfully wrong and we could not discard your Annotation. Please try again or reload.' - ) - ); - return $response; - } - } + if ($sbf_fields = \Drupal::service('strawberryfield.utility') + ->bearsStrawberryfield($node)) { + + foreach ($sbf_fields as $field_name) { + /* @var $field \Drupal\Core\Field\FieldItemInterface */ + $field = $node->get($field_name); + /** @var $field \Drupal\Core\Field\FieldItemList */ + foreach ($field->getIterator() as $delta => $itemfield) { + $keystoreid = static::getTempStoreKeyName( + $field_name, + $delta, + $node->uuid() + ); + try { + $tempstore->delete($keystoreid); + // Do NOT SET stored settings back. + // BECAUSE WE HAVE NOT RELOADED OUR NODE from storage yet OK? + } catch (\Drupal\Core\TempStore\TempStoreException $exception) { + $response->addCommand( + new ReplaceCommand( + '#edit-webannotations > div', + 'So Sorry Something went awfully wrong and we could not discard your Annotation. Please try again or reload.' + ) + ); + return $response; + } + } + } + } + else { + return $response; + } + // Needed so all is restored from storage + $node = node::load($node->id()); + $destination_url = Url::fromRoute('entity.node.edit_form', ['node' => $node->id()]); + $redirect_command = new RedirectCommand($destination_url->toString()); + $response->addCommand(new RemoveCommand('#edit-annotations')); + $response->addCommand($redirect_command); } - } - else { return $response; - } - // Needed so all is restored from storage - $node = node::load($node->id()); - $destination_url = Url::fromRoute('entity.node.edit_form', ['node' => $node->id()]); - $redirect_command = new RedirectCommand($destination_url->toString()); - $response->addCommand(new RemoveCommand('#edit-annotations')); - $response->addCommand($redirect_command); } - return $response; - } - - - - - - /** - * Gives us a key name used by the webforms and widgets. - * - * @param $fieldname - * @param int $delta - * @param string $entity_uuid - * - * @return string - */ - public static function getTempStoreKeyName($fieldname, $delta = 0, $entity_uuid = '0') { - $unique_seed = array_merge( - [$fieldname], - [$delta], - [$entity_uuid] - ); - return sha1(implode('-', $unique_seed)); - } - - /** - * Primes the Web Annotation KeyStore with saved values. - * - * @param \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $itemfield - * @param null $keystoreid - */ - public static function primeKeyStore(StrawberryFieldItem $itemfield, $keystoreid = NULL) { - if ($keystoreid == NULL && strlen(trim($keystoreid)) == 0) { - return NULL; + + + + + + /** + * Gives us a key name used by the webforms and widgets. + * + * @param $fieldname + * @param int $delta + * @param string $entity_uuid + * + * @return string + */ + public static function getTempStoreKeyName($fieldname, $delta = 0, $entity_uuid = '0') { + $unique_seed = array_merge( + [$fieldname], + [$delta], + [$entity_uuid] + ); + return sha1(implode('-', $unique_seed)); } - $jsondata = $itemfield->provideDecoded(TRUE); - $tempstore = \Drupal::service('tempstore.private')->get( - 'webannotation' - ); - if (!empty($jsondata['ap:annotationCollection']) && is_array($jsondata['ap:annotationCollection'])) { - $tempstore->set($keystoreid, $jsondata['ap:annotationCollection']); - return $jsondata['ap:annotationCollection']; + + /** + * Primes the Web Annotation KeyStore with saved values. + * + * @param \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $itemfield + * @param null $keystoreid + */ + public static function primeKeyStore(StrawberryFieldItem $itemfield, $keystoreid = NULL) { + if ($keystoreid == NULL && strlen(trim($keystoreid)) == 0) { + return NULL; + } + $jsondata = $itemfield->provideDecoded(TRUE); + $tempstore = \Drupal::service('tempstore.private')->get( + 'webannotation' + ); + if (!empty($jsondata['ap:annotationCollection']) && is_array($jsondata['ap:annotationCollection'])) { + $tempstore->set($keystoreid, $jsondata['ap:annotationCollection']); + return $jsondata['ap:annotationCollection']; + } + else { + $tempstore->set($keystoreid, []); + return []; + } } - else { - $tempstore->set($keystoreid, []); - return []; + + /** + * Fetchs OCR from the backend and converts them to Annotations + * + * Very similar to \Drupal\strawberryfield\Controller\StrawberryfieldFlavorDatasourceSearchController::originalocrfromSolrIndex + * but with more moving parts (and checks) + * + * @param array $processors + * @param array $file_uris + * @param array $file_uuids + * @param array $node_ids + * @param $offset + * @param $limit + * @param $ocr + * @return array + * @throws PluginException + * @throws SearchApiException + */ + protected function flavorfromSolrIndex(array $processors, array $file_uris, array $file_uuids, array $node_ids = [], $offset = 0, $limit = 100, $ocr = FALSE): array { + + $indexes = StrawberryfieldFlavorDatasource::getValidIndexes(); + + /* @var \Drupal\search_api\IndexInterface[] $indexes */ + + $result_snippets = []; + $search_result = []; + + foreach ($indexes as $search_api_index) { + + // Create the query. + $query = $search_api_index->query([ + 'limit' => $limit, + 'offset' => $offset, + ]); + + $parse_mode = $this->parseModeManager->createInstance('direct'); + $query->setParseMode($parse_mode); + // No key set here, this is a filters query only + $allfields_translated_to_solr = $search_api_index->getServerInstance() + ->getBackend() + ->getSolrFieldNames($query->getIndex()); + // @TODO research if we can do a single Query instead of multiple ones? + if ($ocr) { + if (isset($allfields_translated_to_solr['ocr_text'])) { + $query->setFulltextFields(['ocr_text']); + } + else { + $this->getLogger('format_strawberryfield')->error('We can not execute a Content Search API query against XML OCR without a field named ocr_text of type Full Text Ocr Highlight'); + $search_result['annotations'] = []; + $search_result['total'] = 0; + return $search_result; + } + } + else { + if (isset($allfields_translated_to_solr['sbf_plaintext'])) { + $query->setFulltextFields(['sbf_plaintext']); + } + else { + $this->getLogger('format_strawberryfield')->error('We can not execute a Content Search API query against Plain Extracted Text without a field named sbf_plaintext of type Full Text'); + $search_result['annotations'] = []; + $search_result['total'] = 0; + return $search_result; + } + } + //@TODO: Should this also be a config as `iiif_content_search_api_parent_node_fields` is for example? + $uuid_uri_field = 'file_uuid'; + + + $parent_conditions = $query->createConditionGroup('OR'); + $uri_conditions = $query->createConditionGroup('OR'); + $uuid_conditions = $query->createConditionGroup('OR'); + + // If Nodes are passed use them as conditionals + if (count($node_ids)) { + if (count($parent_conditions->getConditions())) { + $query->addConditionGroup($parent_conditions); + } + } + + $query->addCondition('search_api_datasource', 'strawberryfield_flavor_datasource') + ->addCondition('processor_id', $processors, 'IN'); + + if (isset($allfields_translated_to_solr['ocr_text']) && $ocr) { + // Will be used by \Drupal\strawberryfield\EventSubscriber\SearchApiSolrEventSubscriber::preQuery + $query->setOption('ocr_highlight', 'off'); + // We are already checking if the Node can be viewed. Custom Data Sources can not depend on Solr node access policies. + $query->setOption('search_api_bypass_access', TRUE); + } + if (isset($allfields_translated_to_solr['sbf_plaintext']) && !$ocr) { + // Will be used by \Drupal\strawberryfield\EventSubscriber\SearchApiSolrEventSubscriber::preQuery + $query->setOption('sbf_highlight_fields', 'off'); + // We are already checking if the Node can be viewed. Custom Datasources can not depend on Solr node access policies. + $query->setOption('search_api_bypass_access', TRUE); + } + + $fields_to_retrieve['id'] = 'id'; + if (isset($allfields_translated_to_solr['parent_sequence_id'])) { + $fields_to_retrieve['parent_sequence_id'] = $allfields_translated_to_solr['parent_sequence_id']; + } + if (isset($allfields_translated_to_solr['uuid'])) { + $fields_to_retrieve['uuid'] = $allfields_translated_to_solr['uuid']; + } + if (isset($allfields_translated_to_solr['sequence_id'])) { + $fields_to_retrieve['sequence_id'] = $allfields_translated_to_solr['sequence_id']; + $query->sort('sequence_id', QueryInterface::SORT_ASC); + } + if (isset($allfields_translated_to_solr[$uuid_uri_field])) { + $fields_to_retrieve[$uuid_uri_field] = $allfields_translated_to_solr[$uuid_uri_field]; + // Sadly we have to add the condition here, what if file_uuid is not defined? + } + else { + $this->getLogger('format_strawberryfield')->warning('For Content Search API queries/WebAnnotations from Strawberryflavors, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to search'); + } + if (isset($allfields_translated_to_solr['fulltext'])) { + $fields_to_retrieve['fulltext'] = $allfields_translated_to_solr['fulltext']; + } + else { + $this->getLogger('format_strawberryfield')->warning('For WebAnnotations from Strawberryflavors using OCR, please add a search api field named fulltext containing the complete OCR as XML'); + $search_result['annotations'] = []; + $search_result['total'] = 0; + return $search_result; + } + + + + + $have_file_condition = FALSE; + if (count($file_uris)) { + //Note here. If we don't have any fields configured the response will contain basically ANYTHING + // in the repo. So option 1 is make `iiif_content_search_api_file_uri_fields` required + // bail out if empty? Or, we can add a short limit... that works too for now + // April 2024, to enable in the future postprocessor that generate SBF but not from files (e.g WARC)> + $iiifConfig = $this->config('format_strawberryfield.iiif_settings'); + foreach ($iiifConfig->get('iiif_content_search_api_file_uri_fields') ?? [] as $uri_field) { + if (isset($allfields_translated_to_solr[$uri_field])) { + $uri_conditions->addCondition($uri_field, $file_uris, 'IN'); + $fields_to_retrieve[$uri_field] + = $allfields_translated_to_solr[$uri_field]; + } + if (count($uri_conditions->getConditions())) { + $have_file_condition = TRUE; + $query->addConditionGroup($uri_conditions); + } + } + } + if (count($file_uuids)) { + if (isset($allfields_translated_to_solr[$uuid_uri_field])) { + $uuid_conditions->addCondition($uuid_uri_field, $file_uuids, 'IN'); + } + if (count($uuid_conditions->getConditions())) { + $have_file_condition = TRUE; + $query->addConditionGroup($uuid_conditions); + } + } + if (!$have_file_condition) { + // in case no files are passed to filter, simply limit all to less? + $query->setOption('limit', 10); + } + // This might/not/be/respected. (API v/s reality) + $query->setOption('search_api_retrieved_field_values', array_values($fields_to_retrieve)); + $query->setProcessingLevel(QueryInterface::PROCESSING_FULL); + $results = $query->execute(); + unset($fields_to_retrieve['id']); + unset($fields_to_retrieve['parent_sequence_id']); + $annotations = []; + if ($results->getResultCount() >= 1) { + foreach ($results as $result) { + $real_id = $result->getId(); + $real_sequence = 1; + $real_id_part = explode(":", $real_id); + if (isset($real_id_part[1]) && is_scalar($real_id_part[1])) { + $real_sequence = $real_id_part[1]; + } + $extradata_from_item = $result->getAllExtraData() ?? []; + if (isset($extradata_from_item['search_api_solr_document'][$allfields_translated_to_solr['fulltext']])) { + $annotations = $this->miniOCRtoAnnon($extradata_from_item['search_api_solr_document'][$allfields_translated_to_solr['fulltext']][0], $real_id_part[3] , $real_sequence); + } + } + } + } + return $annotations; } - } + protected function miniOCRtoAnnon(string $miniocr, $file_uuid, $sequence_id):array { + $internalErrors = libxml_use_internal_errors(TRUE); + libxml_clear_errors(); + libxml_use_internal_errors($internalErrors); + $annotations = []; + $miniocr_xml = simplexml_load_string($miniocr); + if (!$miniocr_xml) { + return []; + } + $i = 0; + foreach ($miniocr_xml->children() as $p) { + foreach ($p->children() as $b) { + foreach ($b->children() as $l) { + foreach ($l->children() as $word) { + $text = (string)$word; + if (strlen(trim($text)) > 0) { + $i++; + $wcoos = explode(" ", $word['x']); + $left = (float)$wcoos[0] * 100; + $top = (float)$wcoos[1] * 100; + $width = (float)$wcoos[2] * 100; + $height = (float)$wcoos[3] * 100; + $text = (string)$word; + $annotations[] = [ + "@context" => "http://www.w3.org/ns/anno.jsonld", + "id" => $file_uuid . '_' . $sequence_id .'_' .$i, + "type" => "Annotation", + "body" => [ + "type" => "TextualBody", + "value" => $text + ], + "target" => [ + "selector" => [ + "type" => "FragmentSelector", + "conformsTo" => "http://www.w3.org/TR/media-frags/", + "value" => "xywh=percent:{$left},{$top},{$width},{$height}" + ] + ] + ]; + } + } + } + } + } + // If not miniOCR then bail. @TODO. In the future generate also for AltoXML + return $annotations; + } } From 5e7c7524ec9d4d0b878f27f9262bea8e780fb2ad Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 20 May 2024 21:18:37 -0400 Subject: [PATCH 25/34] Adds Pako so we can zip/deflate/inflate things via JS --- format_strawberryfield.libraries.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/format_strawberryfield.libraries.yml b/format_strawberryfield.libraries.yml index 6ef2c329..a3145ef1 100644 --- a/format_strawberryfield.libraries.yml +++ b/format_strawberryfield.libraries.yml @@ -2,6 +2,7 @@ iiif_formatstrawberryfield_utils: version: 1.0 js: js/iiif-archipelago-interactions_utils.js: {minified: false} + https://cdn.jsdelivr.net/npm/pako@2.1.0/dist/pako.min.js: { external: true, minified: true, preprocess: false} dependencies: - core/jquery - core/drupal From 50a019552038c6b6eb26e0a56d081f1d4c7c59a8 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 20 May 2024 21:19:07 -0400 Subject: [PATCH 26/34] Dispatch image changes ... new little thing for Annotation based Views --- js/iiif-archipelago-interactions_utils.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/js/iiif-archipelago-interactions_utils.js b/js/iiif-archipelago-interactions_utils.js index 152d3e03..efe09d05 100644 --- a/js/iiif-archipelago-interactions_utils.js +++ b/js/iiif-archipelago-interactions_utils.js @@ -48,6 +48,23 @@ return this; }, + dispatchImageViewChange: function(el, encodedImageAnnotation){ + // We don't need the caller here. + // we will use the element itself to fetch who called. + /* el being a dom document via const el = document.getElementById(element_id);*/ + /* nodeid being the ADO Node ID */ + let encodedImageAnnotationOne = ''; + if (Array.isArray(encodedImageAnnotation)) { + encodedImageAnnotationOne = encodedImageAnnotation[0]; + } + else { + encodedImageAnnotationOne = encodedImageAnnotation + } + const event = new CustomEvent('sbf:ado:view:change', { bubbles: true, detail: {image_annotation: encodedImageAnnotationOne} }); + el.dispatchEvent(event); + return this; + }, + dispatchCanvasChange: function(el, canvasid, manifestid, caller_id){ /* el being a dom document via const el = document.getElementById(element_id);*/ From 094317597195f9c8ac800b10ae59a20c6549bc5e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 20 May 2024 21:19:26 -0400 Subject: [PATCH 27/34] The controller was not right. Add the proper data. Needs cleanup --- src/Controller/WebAnnotationController.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Controller/WebAnnotationController.php b/src/Controller/WebAnnotationController.php index 8e059656..b2c017fc 100644 --- a/src/Controller/WebAnnotationController.php +++ b/src/Controller/WebAnnotationController.php @@ -506,10 +506,9 @@ public function readFromFlavors(Request $request, "Wrong request" ); } - - - $existingannotations[$target] = $this->flavorfromSolrIndex([$processors ?? 'ml_yolov8'], [$file->uri],[$target] , [$node->uuid()]); - + $file = reset($file); + error_log($file->getFileUri()); + $existingannotations[$target] = $this->flavorfromSolrIndex([$processors ?? 'ml_yolov8'], [$file->getFileUri()],[$target] , [$node->uuid()]); $return = isset($existingannotations[$target]) && is_array($existingannotations[$target]) ? $existingannotations[$target] : []; } catch (\Exception $exception) { From 49ffdc27463bf9aab51017b1444695f03d77f695 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 21 May 2024 11:14:21 -0400 Subject: [PATCH 28/34] Just a quick commit for Ajax/Exposed/triggered events But i will have to remove tons of code afterwards (means this commit basically does not work because of a Drupal bug that leads to all Drupal.views stored globally being deleted when two ajax views are present in the same page and one uses the Pager of one... damn) --- js/iiif-annotations_strawberry.js | 80 ++++++++++++++++++- .../js/sbf-views-ajax-interactions.js | 19 ++++- 2 files changed, 92 insertions(+), 7 deletions(-) diff --git a/js/iiif-annotations_strawberry.js b/js/iiif-annotations_strawberry.js index 0f9942f2..87e520da 100644 --- a/js/iiif-annotations_strawberry.js +++ b/js/iiif-annotations_strawberry.js @@ -12,6 +12,64 @@ return uuid; } + var DrupalViewsWidget = function(args) { + + // 1. Find a current color setting in the annotation, if any + var currentColorBody = args.annotation ? + args.annotation.bodies.find(function(b) { + return b.purpose == 'highlighting'; + }) : null; + + // 2. Keep the value in a variable + var currentColorValue = currentColorBody ? currentColorBody.value : null; + + // 3. Triggers callbacks on user action + var addTag = function(evt) { + if (currentColorBody) { + args.onUpdateBody(currentColorBody, { + type: 'TextualBody', + purpose: 'highlighting', + value: evt.target.dataset.tag + }); + } else { + args.onAppendBody({ + type: 'TextualBody', + purpose: 'highlighting', + value: evt.target.dataset.tag + }); + } + } + + var createButton = function(value) { + var button = document.createElement('button'); + + if (value == currentColorValue) + button.className = 'selected'; + + button.dataset.tag = value; + button.style.backgroundColor = value; + button.addEventListener('click', addTag); + return button; + } + + var container = document.createElement('div'); + container.className = 'colorselector-widget'; + var button1 = createButton('RED'); + var button2 = createButton('GREEN'); + var button3 = createButton('BLUE'); + + container.appendChild(button1); + container.appendChild(button2); + container.appendChild(button3); + + return container; + } + + + + + + var ThreeWaySwitchElement = function(id, opencv_enabled) { // 3. Triggers callbacks on user action var setOpenCV = function(evt) { @@ -190,6 +248,15 @@ Drupal.behaviors.format_strawberryfield_annotations_initiate = { attach: function (context, settings) { + var disableUrlClickWhenVisible = function(event) { + // this is the bound annotorious instance. + // If there is a Link around the image we won't be able to use any of the tools + // if we don't prevent the default. + // Sadly there is no getVisible() method. But we can check the annotationlayer if any + if (event.currentTarget.querySelector("svg.a9s-annotationlayer")?.style?.display !== "none") { + event.preventDefault(); + } + } var annotorious_annotations = []; var groupssettings = {}; // Only attach to images that have an ID and a not empty data-sbf-annotations-nodeuuid porperty @@ -238,9 +305,11 @@ "readOnly":$readonly, "widgets": $widgets, "image" : document.getElementById(element_id), + "editorDisabled": true } annotorious[element_id] = Annotorious.init($anonconfig); + document.getElementById(element_id).closest("a").addEventListener("click", disableUrlClickWhenVisible.bind(annotorious[element_id]), false); annotorious_annotations[element_id] = []; loadFirstAnnotationOfGroup(element_id); let toggle = ThreeWaySwitchElement(element_id, false); @@ -254,9 +323,14 @@ // Polygon coordinates, in the snippet element's logical coordinate space }); annotorious[element_id].on('clickAnnotation', function(annotation, element) { - console.log(element); - console.log(annotation); - // + + const image_data = { + "fileuuid": groupssetting.file_uuid, + "nodeuuid": groupssetting.nodeuuid, + "fragment": annotation.target.selector.value, + "textualbody": annotation.body?.value + } + Drupal.FormatStrawberryfieldIiifUtils.dispatchImageViewChange(element, btoa(pako.gzip(JSON.stringify(image_data)))); }); }); } diff --git a/modules/format_strawberryfield_views/js/sbf-views-ajax-interactions.js b/modules/format_strawberryfield_views/js/sbf-views-ajax-interactions.js index af269798..769a6770 100644 --- a/modules/format_strawberryfield_views/js/sbf-views-ajax-interactions.js +++ b/modules/format_strawberryfield_views/js/sbf-views-ajax-interactions.js @@ -2,6 +2,7 @@ function CaptureAdoViewChange(e) { let nodeid = null; + let image_annotation = null; if (Array.isArray(e.detail.nodeid)) { nodeid = e.detail.nodeid.join("+"); console.log(nodeid) @@ -9,7 +10,13 @@ else if (typeof e.detail.nodeid !== 'object') { nodeid = e.detail.nodeid; } - if (nodeid) { + // Will an already base64 encoded GZIPPed structure + if (typeof e.detail.image_annotation == 'string') { + image_annotation = e.detail.image_annotation + } + + + if (nodeid || image_annotation) { if (typeof drupalSettings['sbf_ajax_interactions'] === 'object') { for (const property in drupalSettings['sbf_ajax_interactions']) { if (typeof Drupal.views.instances["views_dom_id:" + property] !== "undefined") { @@ -21,9 +28,14 @@ // Has the same order as the arguments passed. But if there are many and currently only one is assigned we // assume the one currently assigned is the first? if (view_instance?.settings?.view_args !== null) { - view_instance.settings.view_args = nodeid; - view_instance.$view.trigger("RefreshView"); + if (nodeid) { + view_instance.settings.view_args = nodeid; + } + if (image_annotation) { + view_instance.settings.view_args = image_annotation; + } } + view_instance.$view.trigger("RefreshView"); } } } @@ -42,7 +54,6 @@ // If the document already has this eventlistener then it won't be added again! Nice. } } - // END jQuery })(jQuery, Drupal, drupalSettings); From 7ce6d4f1bece67cba98ac5ed97024cc8dc7916f3 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 21 May 2024 22:58:55 -0400 Subject: [PATCH 29/34] Cool. Overrides big error on Ajax/Views Drupal + annotations full working --- css/popper.css | 5 + js/iiif-annotations_strawberry.js | 509 ++++++------------ .../js/sbf-views-ajax-dynamic.js | 22 + 3 files changed, 206 insertions(+), 330 deletions(-) diff --git a/css/popper.css b/css/popper.css index c582cfdf..b29383b7 100644 --- a/css/popper.css +++ b/css/popper.css @@ -5,6 +5,11 @@ padding: 4px 8px; font-size: 13px; border-radius: 4px; + display:none; +} + +.popper-background[data-show] { + display: block; } .popper-arrow, diff --git a/js/iiif-annotations_strawberry.js b/js/iiif-annotations_strawberry.js index 87e520da..67609d1c 100644 --- a/js/iiif-annotations_strawberry.js +++ b/js/iiif-annotations_strawberry.js @@ -1,338 +1,187 @@ (function ($, Drupal, once, Annotorious) { - 'use strict'; - - const create_UUID = function() { - var dt = new Date().getTime(); - var uuid = 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { - var r = (dt + Math.random()*16)%16 | 0; - dt = Math.floor(dt/16); - return (c=='x' ? r :(r&0x3|0x8)).toString(16); - }); - return uuid; - } - - var DrupalViewsWidget = function(args) { - - // 1. Find a current color setting in the annotation, if any - var currentColorBody = args.annotation ? - args.annotation.bodies.find(function(b) { - return b.purpose == 'highlighting'; - }) : null; - - // 2. Keep the value in a variable - var currentColorValue = currentColorBody ? currentColorBody.value : null; - - // 3. Triggers callbacks on user action - var addTag = function(evt) { - if (currentColorBody) { - args.onUpdateBody(currentColorBody, { - type: 'TextualBody', - purpose: 'highlighting', - value: evt.target.dataset.tag - }); - } else { - args.onAppendBody({ - type: 'TextualBody', - purpose: 'highlighting', - value: evt.target.dataset.tag + 'use strict'; + + const create_UUID = function() { + var dt = new Date().getTime(); + var uuid = 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { + var r = (dt + Math.random()*16)%16 | 0; + dt = Math.floor(dt/16); + return (c=='x' ? r :(r&0x3|0x8)).toString(16); }); - } - } - - var createButton = function(value) { - var button = document.createElement('button'); - - if (value == currentColorValue) - button.className = 'selected'; - - button.dataset.tag = value; - button.style.backgroundColor = value; - button.addEventListener('click', addTag); - return button; - } - - var container = document.createElement('div'); - container.className = 'colorselector-widget'; - var button1 = createButton('RED'); - var button2 = createButton('GREEN'); - var button3 = createButton('BLUE'); - - container.appendChild(button1); - container.appendChild(button2); - container.appendChild(button3); - - return container; - } - - - - - - - var ThreeWaySwitchElement = function(id, opencv_enabled) { - // 3. Triggers callbacks on user action - var setOpenCV = function(evt) { - // annotorious will be here already. - $(evt.target.parentElement).find('> button').each(function () { - $(this).removeClass('active'); - }); - - if (annotorious[evt.target.getAttribute('data-annotorious-id')]._env.hasOwnProperty('openCV')) { - if (annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV == evt.target.name) { - annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV = false; - } else { - annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV = evt.target.name; - $(evt.target).addClass('active'); - } - } - else { - annotorious[evt.target.getAttribute('data-annotorious-id')]._env.openCV = evt.target.name; - $(evt.target).addClass('active'); - } - } - - - const container = document.createElement('div'); - container.style = "display:inline-flex"; - const toolbar = document.createElement('div'); - toolbar.setAttribute('id', id+ '-annon-toolbar'); - container.appendChild(toolbar); - if (opencv_enabled) { - const input1 = document.createElement('button'); - input1.setAttribute("name","face"); - input1.setAttribute("data-annotorious-id",id); - const input2 = input1.cloneNode(true); - const input3 = input1.cloneNode(true); - input2.setAttribute("name","contour"); - input3.setAttribute("name","contour_adapt"); - input1.setAttribute("value","OpenCV Face Detect"); - input1.setAttribute("id",id + '_face'); - input2.setAttribute("value","OpenCV Countour"); - input2.setAttribute("id",id + '_countour'); - input3.setAttribute("value","OpenCV Countour 2"); - input3.setAttribute("id", id + '_countour_adapt'); - - input1.classList.add('a9s-toolbar-btn','opencv-face'); - input2.classList.add('a9s-toolbar-btn','opencv-contour-light'); - input3.classList.add('a9s-toolbar-btn','opencv-contour-avg'); - input1.addEventListener('click', setOpenCV); - input2.addEventListener('click', setOpenCV); - input3.addEventListener('click', setOpenCV); - container.appendChild(input1); - container.appendChild(input2); - container.appendChild(input3); + return uuid; } - return container; - } - - var showMap = function(evt) { - - /* i need to transform xywh=pixel:217.31248474121094,240.13888549804688,2412.823989868164,1761.0184631347656 - into a valid IIIF Image URL. - @TODO make this an extra argument of L.ImageOverlay.iiifBounded and let that function deal with it? - "type": "SvgSelector", type can be an SvgSelector or a Fragment. Fragment allows me to fetch the portion of the image directly - but the SvgSelector needs to be decompositioned in parts and we need to get the max.x,max,y, min.x and min.y to call The IIIF API Image endpoint - "value": "<\/polygon><\/svg>" - Also all the IIIF target parsing could be a separate reusable function! - Note we are not using the "type" here not bc i'm lazy but bc - we are moving data around in Dom Documents dataset properties. So we parse the string - */ - evt.target.disabled = true; - evt.target.className ='r6o-btn outline'; - const IIIFragment = evt.target.dataset.bound.split("="); - if (IIIFragment.length == 0) { - return; - } - let iiif_region = null; - let clip_path = []; - let iiif_geoextension_sourcecoords = []; - // For our not fancy mesh deforming reality this is always so - iiif_geoextension_sourcecoords.push([0,0]); - let clip_path_string = null; - if (IIIFragment[0] === "xywh") { - const IIIFragmentCoords = IIIFragment[1].split(":"); - // @TODO what if using %percentage here? - const IIIFragmentCoordsIndividual = IIIFragmentCoords[1].split(","); - const iiif_coord_lx = Math.round(IIIFragmentCoordsIndividual[0]); - const iiif_coord_ly = Math.round(IIIFragmentCoordsIndividual[1]); - const iiif_coord_rx = Math.round(IIIFragmentCoordsIndividual[2]); - const iiif_coord_ry = Math.round(IIIFragmentCoordsIndividual[3]); - iiif_region = iiif_coord_lx + "," + iiif_coord_ly + "," + iiif_coord_rx + "," + iiif_coord_ry; - iiif_geoextension_sourcecoords.push([Math.floor(iiif_coord_rx - iiif_coord_lx),0]); - iiif_geoextension_sourcecoords.push([Math.floor(iiif_coord_rx - iiif_coord_lx), Math.floor(iiif_coord_ry - iiif_coord_ly)]); - iiif_geoextension_sourcecoords.push([0, Math.floor(iiif_coord_ry - iiif_coord_ly)]); - } - else if (IIIFragment[0] == "OpenStreetMap contributors' - } - const $minzoom = 1; - const $maxzoom = 15; - Leaflet.tileLayer($tilemap.url, - { - attribution: $tilemap.attribution, - maxZoom: $maxzoom, - minZoom: $minzoom - }).addTo(map); - - let allmarkers = []; - allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(41,-70), {draggable:'true'})); - allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(41,-66), {draggable:'true'})); - allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(39,-66), {draggable:'true'})); - allmarkers.push(new Leaflet.marker(new Leaflet.LatLng(39,-70), {draggable:'true'})); - } - - - - var annotorious = []; - var viewers = []; - - Drupal.behaviors.format_strawberryfield_annotations_initiate = { - attach: function (context, settings) { - var disableUrlClickWhenVisible = function(event) { - // this is the bound annotorious instance. - // If there is a Link around the image we won't be able to use any of the tools - // if we don't prevent the default. - // Sadly there is no getVisible() method. But we can check the annotationlayer if any - if (event.currentTarget.querySelector("svg.a9s-annotationlayer")?.style?.display !== "none") { - event.preventDefault(); - } - } - var annotorious_annotations = []; - var groupssettings = {}; - // Only attach to images that have an ID and a not empty data-sbf-annotations-nodeuuid porperty - const elementsToAttach = once('attache_annotations', 'img[data-sbf-annotations-nodeuuid][id]:not([data-sbf-annotations-nodeuuid=""])', context); - $(elementsToAttach).each(function (index, value) { - // Get the node uuid for this element - let element_id = $(this).attr("id"); - let node_uuid = $(this).data("sbf-annotations-nodeuuid"); - let file_uuid = $(this).data("sbf-annotations-fileuuid"); - let processors = $(this).data("sbf-annotations-processors"); - if (typeof processors !== "undefined") { - groupssettings[element_id] = { - "webannotations" : false, - "nodeuuid" : node_uuid, - "file_uuid" : file_uuid, - "processors" : processors - } - } - }); - $.each(groupssettings, function (element_id, groupssetting) { - function loadFirstAnnotationOfGroup(element_id) { - jQuery.ajax({ - url: '/do/' + groupssetting.nodeuuid + '/webannon/readsbf', - type: "GET", - dataType: 'json', - element_id: element_id, - data: { - 'target_resource_uuid': groupssetting.file_uuid, - 'processors': groupssetting.processors, - }, - success: function (pagedata) { - annotorious[this.element_id].setAnnotations(pagedata); - annotorious_annotations[this.element_id] = [pagedata]; - }, - error: function (xhr, ajaxOptions, thrownError) { - console.log(xhr.status); + var annotorious = []; + var viewers = []; + + Drupal.behaviors.format_strawberryfield_annotations_initiate = { + attach: function (context, settings) { + var disableUrlClickWhenVisible = function (event) { + // Bound to groupsetting (this) + event.preventDefault(); + // If there is a Link around the image we will reuse as direct link + // if we don't prevent the default. + // Sadly there is no getVisible() method. But we can check the annotationlayer if any + // if (event.currentTarget.querySelector("svg.a9s-annotationlayer")?.style?.display !== "none") { + const image_data = { + "fileuuid": this.file_uuid, + "nodeuuid": this.nodeuuid, + "fragment": "xywh=percent:0,0,100,100", + "textualbody": "whole image" + } + Drupal.FormatStrawberryfieldIiifUtils.dispatchImageViewChange(event.target, btoa(pako.gzip(JSON.stringify(image_data)))); + } + var annotorious_annotations = []; + var groupssettings = {}; + // Only attach to images that have an ID and a not empty data-sbf-annotations-nodeuuid porperty + const elementsToAttach = once('attache_annotations', 'img[data-sbf-annotations-nodeuuid][id]:not([data-sbf-annotations-nodeuuid=""])', context); + $(elementsToAttach).each(function (index, value) { + // Get the node uuid for this element + let element_id = $(this).attr("id"); + let node_uuid = $(this).data("sbf-annotations-nodeuuid"); + let file_uuid = $(this).data("sbf-annotations-fileuuid"); + let processors = $(this).data("sbf-annotations-processors"); + if (typeof processors !== "undefined") { + groupssettings[element_id] = { + "webannotations": false, + "nodeuuid": node_uuid, + "file_uuid": file_uuid, + "processors": processors + } + } + }); + var PopperInstance = {}; + if (context && Object.keys(groupssettings).length !== 0) { + if (!document.getElementById("sbf-annotations-popup")) { + const popup = document.createElement('div'); + popup.setAttribute("id", "sbf-annotations-popup"); + popup.setAttribute("role", "tooltip"); + popup.classList.add('popper-background'); + const popup_text = document.createElement('span'); + popup.appendChild(popup_text); + const arrow = document.createElement('div'); + arrow.classList.add('popper-arrow'); + arrow.setAttribute('data-popper-arrow', ''); + popup.appendChild(arrow); + if (context !== document) { + context.closest('div').appendChild(popup); + } + else { + document.getElementById('main').appendChild(popup); + } + } + // Why again? because it might have been created by a previous Ajax call. So we query it. + // But if created in this pass then we are also OK and fetch it in the same const. + const popup = document.getElementById("sbf-annotations-popup") + + function generateGetBoundingClientRect(x = 0, y = 0) { + return () => ({ + width: 0, + height: 0, + top: y, + right: x, + bottom: y, + left: x, + }); + } + const virtualElement = { + getBoundingClientRect: generateGetBoundingClientRect(), + }; + const PopperInstance = Popper.createPopper(virtualElement, popup); + $.each(groupssettings, function (element_id, groupssetting) { + function loadFirstAnnotationOfGroup(element_id) { + jQuery.ajax({ + url: '/do/' + groupssetting.nodeuuid + '/webannon/readsbf', + type: "GET", + dataType: 'json', + element_id: element_id, + data: { + 'target_resource_uuid': groupssetting.file_uuid, + 'processors': groupssetting.processors, + }, + success: function (pagedata) { + annotorious[this.element_id].setAnnotations(pagedata); + annotorious_annotations[this.element_id] = [pagedata]; + }, + error: function (xhr, ajaxOptions, thrownError) { + console.log(xhr.status); + } + }); + } + + console.log("Attaching W3C Annotations from Flavors"); + var $readonly = true; + let $widgets = []; + const $anonconfig = { + "readOnly": $readonly, + "widgets": $widgets, + "image": document.getElementById(element_id), + "editorDisabled": true, + "disableSelect": true, + } + + annotorious[element_id] = Annotorious.init($anonconfig); + document.getElementById(element_id).closest("a").addEventListener("click", disableUrlClickWhenVisible.bind(groupssetting), false); + annotorious_annotations[element_id] = []; + loadFirstAnnotationOfGroup(element_id); + // let toggle = ThreeWaySwitchElement(element_id, false); + // $('#toolbar-' + element_id).prepend(toggle); + annotorious[element_id].on('createSelection', async function (selection) { + if ($readonly) { + return; + } + ; + // Extract the image snippet, recording + // - image snippet (as canvas element) + // - x/y coordinate of the snippet top-left (image coordinate space) + // - kx/ky scale factors between canvas element physical and logical dimensions + // Polygon coordinates, in the snippet element's logical coordinate space + }); + annotorious[element_id].on('clickAnnotation', function (annotation, element) { + + const image_data = { + "fileuuid": groupssetting.file_uuid, + "nodeuuid": groupssetting.nodeuuid, + "fragment": annotation.target.selector.value, + "textualbody": annotation.body?.value + } + Drupal.FormatStrawberryfieldIiifUtils.dispatchImageViewChange(element, btoa(pako.gzip(JSON.stringify(image_data)))); + }); + annotorious[element_id].on('mouseEnterAnnotation', function (annotation, element) { + // element is a so we need to use getBBox. + popup.setAttribute('data-show', ''); + PopperInstance.setOptions((options) => ({ + ...options, + modifiers: [ + ...options.modifiers, + { name: 'eventListeners', enabled: true }, + ], + })); + const svg = element.closest("svg"); + if (svg) { + const p = svg.createSVGPoint() + p.x = element.getBBox().x+ (element.getBBox().width/2); + p.y = element.getBBox().y + element.getBBox().height; + const transformed = p.matrixTransform(svg.getScreenCTM()); + popup.querySelector('span').innerText = annotation.body?.value; + virtualElement.getBoundingClientRect = generateGetBoundingClientRect(transformed.x, transformed.y); + PopperInstance.update(); + } + }); + annotorious[element_id].on('mouseLeaveAnnotation', function (annotation, element) { + + PopperInstance.setOptions((options) => ({ + ...options, + modifiers: [ + ...options.modifiers, + { name: 'eventListeners', enabled: false }, + ], + })); + popup.removeAttribute('data-show'); + }); + }); } - }); - } - - console.log("Attaching W3C Annotations from Flavors"); - var $readonly = true; - let $widgets = [ - ]; - const $anonconfig = { - "readOnly":$readonly, - "widgets": $widgets, - "image" : document.getElementById(element_id), - "editorDisabled": true } - - annotorious[element_id] = Annotorious.init($anonconfig); - document.getElementById(element_id).closest("a").addEventListener("click", disableUrlClickWhenVisible.bind(annotorious[element_id]), false); - annotorious_annotations[element_id] = []; - loadFirstAnnotationOfGroup(element_id); - let toggle = ThreeWaySwitchElement(element_id, false); - $('#toolbar-' + element_id).prepend(toggle); - annotorious[element_id].on('createSelection', async function(selection) { - if ($readonly) { return; }; - // Extract the image snippet, recording - // - image snippet (as canvas element) - // - x/y coordinate of the snippet top-left (image coordinate space) - // - kx/ky scale factors between canvas element physical and logical dimensions - // Polygon coordinates, in the snippet element's logical coordinate space - }); - annotorious[element_id].on('clickAnnotation', function(annotation, element) { - - const image_data = { - "fileuuid": groupssetting.file_uuid, - "nodeuuid": groupssetting.nodeuuid, - "fragment": annotation.target.selector.value, - "textualbody": annotation.body?.value - } - Drupal.FormatStrawberryfieldIiifUtils.dispatchImageViewChange(element, btoa(pako.gzip(JSON.stringify(image_data)))); - }); - }); - } - }; + }; })(jQuery, Drupal, once, window.Annotorious); diff --git a/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js b/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js index 10200cc7..afd7519c 100644 --- a/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js +++ b/modules/format_strawberryfield_views/js/sbf-views-ajax-dynamic.js @@ -129,5 +129,27 @@ }); } } + + /* Overrides core/modules/views/js/ajax_view.js detach method bc it is buggy/unleads before needed + * see patch for Drupal 11. https://www.drupal.org/files/issues/2023-10-20/3132456-17.patch + * */ + Drupal.behaviors.ViewsAjaxView.detach = (context, settings, trigger) => { + if (trigger === 'unload') { + if (settings && settings.views && settings.views.ajaxViews) { + const { + views: { ajaxViews }, + } = settings; + Object.keys(ajaxViews || {}).forEach((i) => { + const selector = `.js-view-dom-id-${ajaxViews[i].view_dom_id}`; + $(selector, context).ajaxComplete(() => { + if ($(selector, context).length) { + delete Drupal.views.instances[i]; + delete settings.views.ajaxViews[i]; + } + }); + }); + } + } + }; })(jQuery, Drupal, drupalSettings); From 5147a2e76731dc05db3ae3a2718431b71a957f87 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 29 May 2024 09:57:34 -0400 Subject: [PATCH 30/34] Make sure link around image exists before trying to intercept it --- js/iiif-annotations_strawberry.js | 5 ++++- src/Controller/WebAnnotationController.php | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/js/iiif-annotations_strawberry.js b/js/iiif-annotations_strawberry.js index 67609d1c..425fce1d 100644 --- a/js/iiif-annotations_strawberry.js +++ b/js/iiif-annotations_strawberry.js @@ -122,7 +122,10 @@ } annotorious[element_id] = Annotorious.init($anonconfig); - document.getElementById(element_id).closest("a").addEventListener("click", disableUrlClickWhenVisible.bind(groupssetting), false); + const wrapping_link = document.getElementById(element_id).closest("a") + if (wrapping_link) { + wrapping_link.addEventListener("click", disableUrlClickWhenVisible.bind(groupssetting), false); + } annotorious_annotations[element_id] = []; loadFirstAnnotationOfGroup(element_id); // let toggle = ThreeWaySwitchElement(element_id, false); diff --git a/src/Controller/WebAnnotationController.php b/src/Controller/WebAnnotationController.php index b2c017fc..a6ae0263 100644 --- a/src/Controller/WebAnnotationController.php +++ b/src/Controller/WebAnnotationController.php @@ -481,9 +481,7 @@ public function readFromFlavors(Request $request, ], ]; - $return = []; $existingannotations = []; - // GET Argument ( $target = $this->requestStack->getCurrentRequest()->query->get('target_resource_uuid'); // Processors need to exist. // We might want to (eventually) decide if we want OCR (normal page level) to be fetched @@ -507,7 +505,7 @@ public function readFromFlavors(Request $request, ); } $file = reset($file); - error_log($file->getFileUri()); + // This allows really for multiple targets. Also, we need more caching here. $existingannotations[$target] = $this->flavorfromSolrIndex([$processors ?? 'ml_yolov8'], [$file->getFileUri()],[$target] , [$node->uuid()]); $return = isset($existingannotations[$target]) && is_array($existingannotations[$target]) ? $existingannotations[$target] : []; } @@ -886,6 +884,8 @@ protected function flavorfromSolrIndex(array $processors, array $file_uris, arra } protected function miniOCRtoAnnon(string $miniocr, $file_uuid, $sequence_id):array { + // To avoid memory crazy ness should we set a limit here? + // As today, archipelago generates OCR per page, so should not be too large. $internalErrors = libxml_use_internal_errors(TRUE); libxml_clear_errors(); libxml_use_internal_errors($internalErrors); From 7e89605b1d9f09a9da613f9cf4c5b9553a5c3e6c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 17 Jun 2024 11:18:59 -0400 Subject: [PATCH 31/34] First pass on Plain Text search So far so good. But the Actual Search needs adapting for non OCR so we return a proper annotation --- .../IiifContentSearchController.php | 79 ++++++++++++++++--- src/Form/IiifSettingsForm.php | 2 +- 2 files changed, 70 insertions(+), 11 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 45b59d80..766ebf44 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -37,6 +37,10 @@ class IiifContentSearchController extends ControllerBase { CONST IIIF_V3_JMESPATH = "items[?not_null(type, \"@type\") == 'Canvas'].[{width:width,height:height,img_canvas_pairs:items[?type == 'AnnotationPage'][].items[?motivation == 'painting' && body.type == 'Image'][body.not_null(id, \"@id\"), not_null(target)][]}][]"; CONST IIIF_V3_JMESPATH_VTT ="items[?not_null(type, \"@type\") == 'Canvas'].[{duration:duration, width:width, height:height, vtt_canvas_annotation_triad:annotations[].items[?motivation=='supplementing' && body.format == 'text/vtt'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; + + CONST IIIF_V3_JMESPATH_TEXT ="items[?not_null(type, \"@type\") == 'Canvas'].[{width:width, height:height, text_canvas_annotation_triad:annotations[].items[?motivation=='supplementing' && body.format == 'text/plain'][body.not_null(id, \"@id\"), not_null(target),not_null(id, \"@id\")][]}][]"; + + /** * Mime type guesser service. * @@ -270,7 +274,10 @@ function () use ($metadataexposeconfig_entity, $node) { $image_hash = []; $vtt_hash = []; - $text_hash = []; + $results = []; + $results_time = []; + $results_text = []; + // Get the Visual X/Y Processors, split, clean; $visual_processors = $this->iiifConfig->get('iiif_content_search_api_visual_enabled_processors') ?? 'ocr'; //@TODO we could do this also on saving? see \Drupal\format_strawberryfield\Form\IiifSettingsForm::submitForm @@ -284,6 +291,12 @@ function () use ($metadataexposeconfig_entity, $node) { $time_processors = array_map('trim', $time_processors); $time_processors = array_filter($time_processors); + $text_processors = $this->iiifConfig->get('iiif_content_search_api_text_enabled_processors') ?? ''; + $text_processors = explode(",", $text_processors); + $text_processors = array_map('trim', $text_processors); + $text_processors = array_filter($text_processors); + + if (count($visual_processors)) { $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( static::IIIF_V3_JMESPATH, $jsonArray @@ -309,6 +322,17 @@ function () use ($metadataexposeconfig_entity, $node) { } } + if (count($text_processors)) { + $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( + static::IIIF_V3_JMESPATH_TEXT, $jsonArray + ); + $image_hash = $this->cleanTextJmesPathResult($jmespath_searchresult); + unset($jmespath_searchresult); + if (count($image_hash)) { + $results_text = $this->flavorfromSolrIndex($the_query_string, $text_processors, array_keys($image_hash), [], [], ($page * $per_page), $per_page, FALSE); + } + } + /* Expected structure independent if V2 or V3. result = {array[345]} 0 = {array[3]} @@ -457,25 +481,19 @@ function () use ($metadataexposeconfig_entity, $node) { } } } - - - - - - - + if (count($entries) == 0) { $results['total'] = 0; } $total = ($results['total'] ?? 0) + ($results_time['total'] ?? 0); if ($total > $this->iiifConfig->get('iiif_content_search_api_results_per_page')) { - $max_page = ceil($results['total']/$this->iiifConfig->get('iiif_content_search_api_results_per_page')) - 1; + $max_page = ceil($total/$this->iiifConfig->get('iiif_content_search_api_results_per_page')) - 1; if ($version == "v1") { $paging_structure = [ "within" => [ "@type" => "sc:Layer", - "total" => $results['total'], + "total" => $total, "first" => $current_url_clean_no_page.'/0?='.urlencode($the_query_string), "last" => $current_url_clean_no_page.'/'.$max_page .'?='.urlencode($the_query_string), ] @@ -615,6 +633,47 @@ protected function cleanVttJmesPathResult(array $jmespath_searchresult, $targetA return $vtt_hash; } + /** + * Cleans the over complex original JMESPATH result for a VTT to a reversed array. + * + * @param array $jmespath_searchresult + * @param bool $targetAnnotation + * If TRUE, we will return the VTT and the annotation itself as the target (allowing multiple VTTs per Canvas) + * If FALSE, we will return the VTT and the Canvas itself as the target (not caring which VTT matched) + * @return array + */ + protected function cleanTextJmesPathResult(array $jmespath_searchresult, $targetAnnotation = TRUE): array { + $text_hash = []; + foreach($jmespath_searchresult as $entries_percanvas) { + foreach (($entries_percanvas['text_canvas_annotation_triad'] ?? []) as $text_canvas_annon_triad) { + $vtt_uuid = NULL; + $path = pathinfo($text_canvas_annon_triad[0] ?? '/'); + $parts = explode("/", $path['dirname']); + $parts = array_reverse($parts); + // Might be longer (normally 8), if a subdomain with paths, that is why we reverse that paths + if (count($parts) >= 5 && $parts[0] == "download" && Uuid::isValid($parts[1]) && $parts[2] == "file" && Uuid::isValid($parts[3]) && $parts[4] == "do") { + $text_uuid = $parts[1]; + } + if (!$text_uuid) { + // just skip if we have no File uuid. + continue; + } + // The $text_canvas_annon_triad[1] is the Canvas targeted by the Text. + // The $text_canvas_annon_triad[2] is the AnnotationID containing the Text. + $sequence = 1 ; + $target = $targetAnnotation ? ($text_canvas_annon_triad[2] ?? NULL) : ($text_canvas_annon_triad[1] ?? NULL); + if (!$target) { + // just skip if we have no Target. + continue; + } + // We don't use the duration so if not present just give it a second to have a value in this array. + $text_hash[$text_uuid][$sequence][$target] = [1]; + } + } + unset($jmespath_searchresult); + return $text_hash; + } + /** * OCR/Annnotation Search Controller specific to IIIF Content Search Needs diff --git a/src/Form/IiifSettingsForm.php b/src/Form/IiifSettingsForm.php index 4cc9955a..6d140927 100644 --- a/src/Form/IiifSettingsForm.php +++ b/src/Form/IiifSettingsForm.php @@ -172,7 +172,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { 'Strawberry Runner processors that should be searched against plain text extractions.' ), '#description' => $this->t( - 'e.g Strawberry Flavor Data might have been generated by the "text" strawberry runners processor. These will not have coordinates but will match IIIF Annotations with motivation supplementing and target the whole canvas. A comma separated list of processors (machine names) that generated time based transcripts encoded as miniOCR.' + 'e.g Strawberry Flavor Data might have been generated by the "text" strawberry runners processor. These will not have coordinates but will match IIIF Annotations with motivation supplementing and target the whole canvas. A comma separated list of processors (machine names) that generated pure text extractions without miniOCR.' ), '#default_value' => !empty($config->get('iiif_content_search_api_text_enabled_processors')) ? $config->get( 'iiif_content_search_api_text_enabled_processors' From 0de131ade7cb1bb3e8bcda7341df8f05e7809a23 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 17 Jun 2024 14:22:25 -0400 Subject: [PATCH 32/34] Fully functional Plaintext Content Search API response @alliomeria need to share the IIIF Manifest that drives this. Very similar to VTT --- .../IiifContentSearchController.php | 189 ++++++++++++------ 1 file changed, 129 insertions(+), 60 deletions(-) diff --git a/src/Controller/IiifContentSearchController.php b/src/Controller/IiifContentSearchController.php index 766ebf44..e0092370 100644 --- a/src/Controller/IiifContentSearchController.php +++ b/src/Controller/IiifContentSearchController.php @@ -13,6 +13,7 @@ use Drupal\format_strawberryfield\Tools\IiifHelper; use Drupal\search_api\Query\QueryInterface; use Drupal\search_api\SearchApiException; +use Drupal\search_api_solr\Utility\Utility as UtilityAlias; use Drupal\strawberryfield\Plugin\search_api\datasource\StrawberryfieldFlavorDatasource; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use Ramsey\Uuid\Uuid; @@ -323,13 +324,14 @@ function () use ($metadataexposeconfig_entity, $node) { } if (count($text_processors)) { - $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( - static::IIIF_V3_JMESPATH_TEXT, $jsonArray - ); - $image_hash = $this->cleanTextJmesPathResult($jmespath_searchresult); - unset($jmespath_searchresult); + $jmespath_searchresult = StrawberryfieldJsonHelper::searchJson( + static::IIIF_V3_JMESPATH_TEXT, $jsonArray + ); + // Mirador does not know how to target a Text Annotation that is Suplemental. So target the Canvas + $text_hash = $this->cleanTextJmesPathResult($jmespath_searchresult, FALSE); + unset($jmespath_searchresult); if (count($image_hash)) { - $results_text = $this->flavorfromSolrIndex($the_query_string, $text_processors, array_keys($image_hash), [], [], ($page * $per_page), $per_page, FALSE); + $results_text = $this->flavorfromSolrIndex($the_query_string, $text_processors, [], array_keys($text_hash), [], ($page * $per_page), $per_page, FALSE); } } @@ -345,6 +347,7 @@ function () use ($metadataexposeconfig_entity, $node) { */ $entries = []; $paging_structure = []; + $uuid_uri_field = 'file_uuid'; // Image/Visual based Annotations if (count($results['annotations'] ?? [])) { $i = 0; @@ -432,8 +435,7 @@ function () use ($metadataexposeconfig_entity, $node) { // Calculate Canvas and its offset // PDFs Sequence is correctly detected, but on images it should always be "1" // For that we will change the response from the main Solr search using our expected ID (splitting) - $uuid_uri_field = 'file_uuid'; - // Different than normal OCR. Single UUID per file. + // Different from normal OCR. Single UUID per file. $uuid = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; if ($uuid) { @@ -441,8 +443,8 @@ function () use ($metadataexposeconfig_entity, $node) { foreach ($target as $target_id => $target_data) { if ($target_id) { $target_time = [ - round($annotation['s'],2), - round($annotation['e'],2) + round($annotation['s'],2), + round($annotation['e'],2) ]; $target_fragment = "#t=" . implode( ",", $target_time @@ -481,11 +483,63 @@ function () use ($metadataexposeconfig_entity, $node) { } } } - + // Plain Text Annotations + if (count($results_text['annotations'] ?? [])) { + $i = 0; + foreach ($results_text['annotations'] as $hit => $hits_per_file_and_sequence) { + foreach ( + ($hits_per_file_and_sequence['boxes'] ?? []) as $annotation + ) { + $i++; + // Calculate Canvas and its offset + // PDFs Sequence is correctly detected, but on images it should always be "1" + // For that we will change the response from the main Solr search using our expected ID (splitting) + // Different from normal OCR. Single UUID per file. + $uuid = $hits_per_file_and_sequence['sbf_metadata'][$uuid_uri_field] ?? NULL; + $sequence_id = $hits_per_file_and_sequence['sbf_metadata']['sequence_id'] ?? 1; + if ($uuid) { + $target = $text_hash[$uuid][$sequence_id] ?? []; + foreach ($target as $target_id => $target_data) { + if ($target_id) { + // V1 + // Generate the entry + if ($version == "v1") { + $entries[] = [ + "@id" => $current_url_clean + . "/annotation/anno-result/$i", + "@type" => "oa:Annotation", + "motivation" => $target_annotation ? "supplementing" : "painting", + "resource" => [ + "@type" => "cnt:ContentAsHTML", + "chars" => $annotation['snippet'], + ], + "on" => ($target_id).'#' + ]; + } elseif ($version == "v2") { + $entries[] = [ + "id" => $current_url_clean + . "/annotation/anno-result/$i", + "type" => "Annotation", + "motivation" => $target_annotation ? "supplementing" : "painting", + "body" => [ + "type" => "TextualBody", + "value" => $annotation['snippet'], + "format" => "text/html", + ], + "target" => $target_id.'#' + ]; + } + } + } + } + } + } + } + if (count($entries) == 0) { $results['total'] = 0; } - $total = ($results['total'] ?? 0) + ($results_time['total'] ?? 0); + $total = ($results['total'] ?? 0) + ($results_time['total'] ?? 0) + ($results_text['total'] ?? 0); if ($total > $this->iiifConfig->get('iiif_content_search_api_results_per_page')) { $max_page = ceil($total/$this->iiifConfig->get('iiif_content_search_api_results_per_page')) - 1; @@ -638,15 +692,15 @@ protected function cleanVttJmesPathResult(array $jmespath_searchresult, $targetA * * @param array $jmespath_searchresult * @param bool $targetAnnotation - * If TRUE, we will return the VTT and the annotation itself as the target (allowing multiple VTTs per Canvas) - * If FALSE, we will return the VTT and the Canvas itself as the target (not caring which VTT matched) + * If TRUE, we will return the Text and the annotation itself as the target (allowing multiple Texts per Canvas) + * If FALSE, we will return the Text and the Canvas itself as the target (not caring which Text matched) * @return array */ protected function cleanTextJmesPathResult(array $jmespath_searchresult, $targetAnnotation = TRUE): array { $text_hash = []; foreach($jmespath_searchresult as $entries_percanvas) { foreach (($entries_percanvas['text_canvas_annotation_triad'] ?? []) as $text_canvas_annon_triad) { - $vtt_uuid = NULL; + $text_uuid = NULL; $path = pathinfo($text_canvas_annon_triad[0] ?? '/'); $parts = explode("/", $path['dirname']); $parts = array_reverse($parts); @@ -721,19 +775,16 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f if ($ocr) { if (isset($allfields_translated_to_solr['ocr_text'])) { $query->setFulltextFields(['ocr_text']); - } - else { + } else { $this->getLogger('format_strawberryfield')->error('We can not execute a Content Search API query against XML OCR without a field named ocr_text of type Full Text Ocr Highlight'); $search_result['annotations'] = []; $search_result['total'] = 0; return $search_result; } - } - else { + } else { if (isset($allfields_translated_to_solr['sbf_plaintext'])) { $query->setFulltextFields(['sbf_plaintext']); - } - else { + } else { $this->getLogger('format_strawberryfield')->error('We can not execute a Content Search API query against Plain Extracted Text without a field named sbf_plaintext of type Full Text'); $search_result['annotations'] = []; $search_result['total'] = 0; @@ -794,8 +845,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f if (isset($allfields_translated_to_solr[$uuid_uri_field])) { $fields_to_retrieve[$uuid_uri_field] = $allfields_translated_to_solr[$uuid_uri_field]; // Sadly we have to add the condition here, what if file_uuid is not defined? - } - else { + } else { $this->getLogger('format_strawberryfield')->warning('For Content Search API queries, please add a search api field named file_uuid containing the UUID of the file entity that generated the extraction you want to search'); } $have_file_condition = FALSE; @@ -854,39 +904,39 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $region_text = $term; $page_number_by_id = []; if ($results->getResultCount() >= 1) { + // This applies to all searches with hits. + foreach ($results as $result) { + $real_id = $result->getId(); + $real_sequence = NULL; + $real_id_part = explode(":", $real_id); + if (isset($real_id_part[1]) && is_scalar($real_id_part[1])) { + $real_sequence = $real_id_part[1]; + } + $extradata_from_item = $result->getAllExtraData() ?? []; + + foreach ($fields_to_retrieve as $machine_name => $field) { + $filedata_by_id[$extradata_from_item['search_api_solr_document']['id']][$machine_name] = $extradata_from_item['search_api_solr_document'][$field] ?? NULL; + } + if ($real_sequence) { + $filedata_by_id[$extradata_from_item['search_api_solr_document']['id']]['sequence_id'] = $real_sequence; + } + } if (isset($extradata['search_api_solr_response']['ocrHighlighting']) && count( $extradata['search_api_solr_response']['ocrHighlighting'] ) > 0) { - foreach ($results as $result) { - $real_id = $result->getId(); - $real_sequence = NULL; - $real_id_part = explode(":", $real_id); - if (isset($real_id_part[1]) && is_scalar($real_id_part[1])) { - $real_sequence = $real_id_part[1]; - } - $extradata_from_item = $result->getAllExtraData() ?? []; - - foreach($fields_to_retrieve as $machine_name => $field) { - $filedata_by_id[$extradata_from_item['search_api_solr_document']['id']][$machine_name] = $extradata_from_item['search_api_solr_document'][$field] ?? NULL; - } - if ($real_sequence) { - $filedata_by_id[$extradata_from_item['search_api_solr_document']['id']]['sequence_id'] = $real_sequence; - } - } foreach ($extradata['search_api_solr_response']['ocrHighlighting'] as $sol_doc_id => $field) { $result_snippets_base = []; if (isset($field[$allfields_translated_to_solr['ocr_text']]['snippets']) && is_array($field[$allfields_translated_to_solr['ocr_text']]['snippets'])) { foreach ($field[$allfields_translated_to_solr['ocr_text']]['snippets'] as $snippet) { - $page_width = (float) $snippet['pages'][0]['width']; - $page_height = (float) $snippet['pages'][0]['height']; + $page_width = (float)$snippet['pages'][0]['width']; + $page_height = (float)$snippet['pages'][0]['height']; $is_time = str_starts_with($snippet['pages'][0]['id'], 'timesequence_'); if ($is_time) { $result_snippets_base = [ 'timespans' => $result_snippets_base['timespans'] ?? [], ]; - } - else { + } else { $result_snippets_base = [ 'boxes' => $result_snippets_base['boxes'] ?? [], ]; @@ -900,33 +950,32 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f $region_text = $snippet['regions'][$parent_region]['text'] ?? $term; $hit = $highlight[0]['text'] ?? $term; - $before_and_after = explode("{$hit}", strip_tags($region_text ?? $term)); + $before_and_after = explode("{$hit}", strip_tags($region_text ?? $term)); // Check if (int) coordinates lrx >1 (ALTO) ... assuming nothing is at 1px to the right? // else between 0 and < 1 (MINIOCR) - $before_index = $shared_parent_region[$parent_region] -1; + $before_index = $shared_parent_region[$parent_region] - 1; $before_index = $before_index > 0 ? $before_index : 0; $after_index = $shared_parent_region[$parent_region]; $after_index = ($after_index < count($before_and_after)) ? $after_index : 1; - if ( ((int) $highlight[0]['lrx']) > 1 ){ + if (((int)$highlight[0]['lrx']) > 1) { //ALTO so coords need to be relative - $left = sprintf('%.3f',((float) $highlight[0]['ulx'] / $page_width)); - $top = sprintf('%.3f',((float) $highlight[0]['uly'] / $page_height)); - $right = sprintf('%.3f',((float) $highlight[0]['lrx'] / $page_width)); - $bottom = sprintf('%.3f',((float) $highlight[0]['lry'] / $page_height)); + $left = sprintf('%.3f', ((float)$highlight[0]['ulx'] / $page_width)); + $top = sprintf('%.3f', ((float)$highlight[0]['uly'] / $page_height)); + $right = sprintf('%.3f', ((float)$highlight[0]['lrx'] / $page_width)); + $bottom = sprintf('%.3f', ((float)$highlight[0]['lry'] / $page_height)); $result_snippets_base['boxes'][] = [ 'l' => $left, 't' => $top, 'r' => $right, 'b' => $bottom, 'snippet' => $region_text, - 'before' => $before_and_after[$before_index] ?? '', - 'after' => $before_and_after[$after_index] ?? '', + 'before' => $before_and_after[$before_index] ?? '', + 'after' => $before_and_after[$after_index] ?? '', 'hit' => $hit, 'time' => $is_time, ]; - } - else { + } else { //MINIOCR coords already relative // Deal with time here if (!$is_time) { @@ -941,15 +990,14 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f 'hit' => $hit, 'time' => $is_time ]; - } - else { + } else { // IN this case, because on now text spans into other regions, we use 'text' instead of // $region_text like in a normal HOCR // It is about time! // Before and after. We will try to split the original text by the math // If we end with more than 2 pieces, we can't be sure where it was found .. // so we set them '' ? - $before_and_after = explode($highlight[0]['text'],strip_tags($region_text)); + $before_and_after = explode($highlight[0]['text'], strip_tags($region_text)); $result_snippets_base['timespans'][] = [ 's' => ($highlight[0]['uly'] * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, 'e' => ($highlight[0]['lry'] * $page_height) / StrawberryfieldFlavorDatasource::PIXELS_PER_SECOND, @@ -963,7 +1011,7 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f } } } - foreach($fields_to_retrieve as $machine_name => $machine_name_field) { + foreach ($fields_to_retrieve as $machine_name => $machine_name_field) { $result_snippets_base['sbf_metadata'][$machine_name] = $filedata_by_id[$sol_doc_id][$machine_name]; } } @@ -971,9 +1019,30 @@ protected function flavorfromSolrIndex(string $term, array $processors, array $f } } elseif (isset($extradata['search_api_solr_response'])) { - // if no ocr hl was passed we won't have $extradata['search_api_solr_response']['ocrHighlighting'], so we process - // the other. These results won't have coordinates. + if (isset($extradata['search_api_solr_response']['highlighting']) && count( + $extradata['search_api_solr_response']['highlighting'] + ) > 0) { + $result_snippets_base = []; + foreach ($extradata['search_api_solr_response']['highlighting'] as $sol_doc_id => $field) { + $result_snippets_base = [ + 'boxes' => $result_snippets_base['boxes'] ?? [], + ]; + foreach ($field[$allfields_translated_to_solr['sbf_plaintext']] as $snippet) { + $result_snippets_base['boxes'][] = [ + 'snippet' => UtilityAlias::formatHighlighting($snippet, '', ''), + 'hit' => implode(' ', UtilityAlias::getHighlightedKeys($snippet)), + 'time' => FALSE, + ]; + } + foreach ($fields_to_retrieve as $machine_name => $machine_name_field) { + $result_snippets_base['sbf_metadata'][$machine_name] = $filedata_by_id[$sol_doc_id][$machine_name]; + } + $result_snippets[] = $result_snippets_base; + } + } } + // if no ocr hl was passed we won't have $extradata['search_api_solr_response']['ocrHighlighting'], so we process + // the other. These results won't have coordinates. } } $search_result['annotations'] = $result_snippets; From bc5c3a635c1a5efbc7b5eb104123b2bfed0f3d9d Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 17 Jun 2024 18:08:11 -0400 Subject: [PATCH 33/34] Fixes Canvas not being selected before sending the Annotation When multiple canvas, videos,audios, text are all present in the Content Search Results --- js/mirador_strawberry.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/js/mirador_strawberry.js b/js/mirador_strawberry.js index ccb07b9c..dc27cb06 100644 --- a/js/mirador_strawberry.js +++ b/js/mirador_strawberry.js @@ -143,7 +143,7 @@ const matches = state.annotations[canvas][annotation_page].json.items.filter(item => { return (item['id'] === on[0] && item['body'].format == 'text/vtt'); }); - if (Array.isArray(matches)) { + if (Array.isArray(matches) && matches.length == 1 && typeof matches[0] == "object" && matches[0].hasOwnProperty('body')) { vtt_url = matches[0].body.id; canvas_id_for_vtt = matches[0].target; canvas_time_for_media = on[1].split(",", 1); @@ -161,8 +161,9 @@ } } if (canvas_id_for_vtt != current_canvas?.id && canvas_id_for_vtt != null) { - // take will wait/ call will run async - let visibleCanvasesForAnnotation = (yield effects.take( Mirador.actions.setCanvas(windowId, canvas_id_for_vtt))).payload.visibleCanvases + // take will wait/ call will run in sync/Block. + const visibleCanvasesForAnnotationAction = Mirador.actions.setCanvas(windowId, canvas_id_for_vtt); + let visibleCanvasesForAnnotation = (yield effects.putResolve(visibleCanvasesForAnnotationAction)); } if (canvas_id_for_vtt != null) { let MediaWindow = document.getElementById(windowId); From b46c6a2220dd0428fb14bf43fdefd13739b91856 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 18 Jun 2024 14:01:08 -0400 Subject: [PATCH 34/34] I know the delay is large (1.5 seconds) but really a video/update might take that time @alliomeria i think i got it this time. But i can't say for sure. At least nothing breaks and the canvas swapping does work --- js/mirador_strawberry.js | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/js/mirador_strawberry.js b/js/mirador_strawberry.js index dc27cb06..de3003f0 100644 --- a/js/mirador_strawberry.js +++ b/js/mirador_strawberry.js @@ -126,7 +126,7 @@ const state = yield effects.select(Mirador.actions.getState); if (action.type === ActionTypes.SELECT_ANNOTATION) { - const { windowId } = action + const { windowId, annotationId } = action const searches = yield effects.select(Mirador.selectors.getSearchForWindow, { windowId }); const current_canvas = yield effects.select(Mirador.selectors.getCurrentCanvas, { windowId }); let vtt_url = null; @@ -163,7 +163,15 @@ if (canvas_id_for_vtt != current_canvas?.id && canvas_id_for_vtt != null) { // take will wait/ call will run in sync/Block. const visibleCanvasesForAnnotationAction = Mirador.actions.setCanvas(windowId, canvas_id_for_vtt); - let visibleCanvasesForAnnotation = (yield effects.putResolve(visibleCanvasesForAnnotationAction)); + // Tiny trick. Because the canvas is not active i can't actually put/trigger a setCanvas and THEN + // a SelectAnnotation (or i don't know how, tried, parallel, in sync, nothing. + // But I can update the config so on the next setCanvas reaction I can read that value and trigger it + // Still, i am getting race conditions of HTML not being there in the DOM yet. + const temp_update = Mirador.actions.updateConfig({'timeAnnotation':[windowId, annotationId]}); + yield effects.put(temp_update); + yield effects.all([ + effects.put(visibleCanvasesForAnnotationAction) + ]); } if (canvas_id_for_vtt != null) { let MediaWindow = document.getElementById(windowId); @@ -208,10 +216,10 @@ .visibleCanvases } const manifest = yield effects.select(Mirador.selectors.getManifest, { windowId }); - const manifestUrl = manifest.id; if (!manifest.json) { return } + const manifestUrl = manifest.id; if (!view) { view = (yield effects.select(Mirador.selectors.getWindowConfig, { windowId, @@ -280,6 +288,18 @@ } else if (view === 'book') { newParams.page = canvasIndices.find(e => !!e).join(',') } + // Now at the end. If a VTT annotation requested a Canvas to be set. we need to check if we have in the config + // A temporary stored valued of the last clicked annotation. + // Use if here. + if (typeof state.config.timeAnnotation !== "undefined") { + if (Array.isArray(state.config.timeAnnotation) && state.config.timeAnnotation.length == 2) { + const selectAnnotationAction = Mirador.actions.selectAnnotation(state.config.timeAnnotation[0], state.config.timeAnnotation[1]); + const temp_update = Mirador.actions.updateConfig({'timeAnnotation': null }); + yield effects.put(temp_update); + yield effects.delay(1500); + yield effects.put(selectAnnotationAction); + } + } } else if (action.type === ActionTypes.RECEIVE_SEARCH) { const { windowId, companionWindowId } = action