From 9eb70929bab195418c94e934079dfcfd3a050531 Mon Sep 17 00:00:00 2001 From: Abel Perez Date: Tue, 8 Nov 2022 21:09:18 -0400 Subject: [PATCH 1/3] Added new search and replace pipeline --- pipelines/SearchReplace.json | 229 ++++++++++++++++++++++++++++++++ src/lib/stepTemplates.js | 112 ++++++++-------- src/transforms/searchReplace.js | 179 +++++++++++++++++++++++++ 3 files changed, 465 insertions(+), 55 deletions(-) create mode 100644 pipelines/SearchReplace.json create mode 100644 src/transforms/searchReplace.js diff --git a/pipelines/SearchReplace.json b/pipelines/SearchReplace.json new file mode 100644 index 0000000..bf10bb5 --- /dev/null +++ b/pipelines/SearchReplace.json @@ -0,0 +1,229 @@ +[ + { + "id": 1, + "title": "USFM from DCS", + "type": "Source", + "sourceLocation": "http", + "outputType": "text", + "httpUrl": "https://git.door43.org/unfoldingWord/en_ult/raw/branch/master/57-TIT.usfm" + }, + { + "id": 2, + "title": "Selectors", + "type": "Source", + "sourceLocation": "local", + "localValue": "{\"lang\": \"eng\", \"abbr\": \"ust\"}", + "outputType": "json" + }, + { + "id": 3, + "title": "USFM to PERF", + "name": "usfm2perf", + "type": "Transform", + "inputs": [ + { + "name": "usfm", + "type": "text", + "source": "Source 1" + }, + { + "name": "selectors", + "type": "json", + "source": "Source 2" + } + ], + "outputs": [ + { + "name": "perf", + "type": "json" + } + ], + "description": "USFM=>PERF: Conversion via Proskomma" + }, + { + "id": 4, + "title": "Count orig perf words", + "name": "verseWords", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 3 perf" + } + ], + "outputs": [ + { + "name": "verseWords", + "type": "json" + } + ], + "description": "PERF=>JSON: Counts words occurrences" + }, + { + "id": 6, + "title": "Strip Markup", + "name": "stripMarkup", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 3 perf" + }, + { + "name": "verseWords", + "type": "json", + "source": "Transform 4 verseWords" + } + ], + "outputs": [ + { + "name": "perf", + "type": "json" + }, + { + "name": "stripped", + "type": "json" + } + ], + "description": "PERF=>PERF: Strips alignment markup" + }, + { + "id": 9, + "title": "Merge Text in Stripped PERF", + "name": "mergePerfText", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 6 perf" + } + ], + "outputs": [ + { + "name": "perf", + "type": "json" + } + ], + "description": "PERF=>PERF: Merge consecutive text strings" + }, + { + "id": 19, + "title": "Seach params", + "type": "Source", + "sourceLocation": "local", + "localValue": "{\"target\": \"/{(.+?)}/gi\", \"replacement\": \"$1\", \"config\": {\"isRegex\": \"true\"}}", + "outputType": "json" + }, + { + "id": 20, + "title": "Search", + "name": "searchReplace", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 9 perf" + }, + { + "name": "params", + "type": "json", + "source": "Source 19" + } + ], + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "description": "Search the given word" + }, + { + "id": 25, + "title": "Search Results", + "type": "Display", + "inputType": "json", + "inputSource": "Transform 20 results" + }, + { + "id": 12, + "title": "Count stripped perf words", + "name": "verseWords", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 9 perf" + } + ], + "outputs": [ + { + "name": "verseWords", + "type": "json" + } + ], + "description": "PERF=>JSON: Counts words occurrences" + }, + { + "id": 13, + "title": "Merge Back Into Stripped (roundtrip)", + "name": "mergeMarkup", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 9 perf" + }, + { + "name": "stripped", + "type": "json", + "source": "Transform 6 stripped" + }, + { + "name": "verseWords", + "type": "json", + "source": "Transform 12 verseWords" + } + ], + "outputs": [ + { + "name": "perf", + "type": "json" + } + ], + "description": "PERF=>PERF adds report to verses" + }, + { + "id": 17, + "title": "Merge Merged PERF Text", + "name": "mergePerfText", + "type": "Transform", + "inputs": [ + { + "name": "perf", + "type": "json", + "source": "Transform 13 perf" + } + ], + "outputs": [ + { + "name": "perf", + "type": "json" + } + ], + "description": "PERF=>PERF: Merge consecutive text strings" + }, + { + "id": 14, + "title": "Merged Perf (roundtrip)", + "type": "Display", + "inputType": "json", + "inputSource": "Transform 17 perf" + } +] diff --git a/src/lib/stepTemplates.js b/src/lib/stepTemplates.js index a7d61a9..915393c 100644 --- a/src/lib/stepTemplates.js +++ b/src/lib/stepTemplates.js @@ -6,8 +6,8 @@ import wordSearch from "../transforms/wordSearch"; import justTheBible from "../transforms/justTheBible"; import verseStats from "../transforms/verseStats"; import proskommaQuery from "../transforms/proskommaQuery"; -import lightRegex from '../transforms/lightRegex'; -import regex from '../transforms/regex'; +import lightRegex from "../transforms/lightRegex"; +import regex from "../transforms/regex"; import validate from "../transforms/validate"; import diffText from "../transforms/diffText"; import diffJson from "../transforms/diffJson"; @@ -22,66 +22,68 @@ import stripMarkup from "../transforms/stripMarkup"; import verseWords from "../transforms/verseWords"; import mergeMarkup from "../transforms/mergeMarkup"; import prepareJsonDiff from "../transforms/prepareJsonDiff"; -import uniqueWordsVerses from '../transforms/uniqueWordsVerses'; +import uniqueWordsVerses from "../transforms/uniqueWordsVerses"; +import searchReplace from "../transforms/searchReplace"; import perf2PkJson from "../transforms/perf2PkJson"; import calculateUsfmChapterPositions from "../transforms/calculateUsfmChapterPositions"; const stepTemplates = { - Transform: { - usfm2perf, - usx2perf, - proskommaQuery, - validate, - diffText, - diffJson, - identity, - justTheBible, - mergePerfText, - wordFrequency, - wordSearch, - verseStats, - perf2usfm, - searchRegexGen, - longVerses1, - mergeReport, - uniqueWords, - remoteTransform, - stripMarkup, - verseWords, - mergeMarkup, - prepareJsonDiff, - uniqueWordsVerses, - lightRegex, - regex, - perf2PkJson, - calculateUsfmChapterPositions, + Transform: { + usfm2perf, + usx2perf, + proskommaQuery, + validate, + diffText, + diffJson, + identity, + justTheBible, + mergePerfText, + wordFrequency, + wordSearch, + verseStats, + perf2usfm, + searchRegexGen, + longVerses1, + mergeReport, + uniqueWords, + remoteTransform, + stripMarkup, + verseWords, + mergeMarkup, + prepareJsonDiff, + uniqueWordsVerses, + lightRegex, + regex, + perf2PkJson, + calculateUsfmChapterPositions, + searchReplace, + }, + Source: { + local: { + type: "Source", + sourceLocation: "local", + localValue: "", + outputType: "text", }, - Source: { - local: { - type: "Source", - sourceLocation: "local", - localValue: "", - outputType: "text", - }, - http: { - type: "Source", - sourceLocation: "http", - httpUrl: "", - outputType: "text" - } + http: { + type: "Source", + sourceLocation: "http", + httpUrl: "", + outputType: "text", }, - Display: { - text: { - type: "Display", - inputType: "text", - inputSource: "", - }, - json: { - type: "Display", - inputType: "json", - inputSource: "", - }, + }, + Display: { + text: { + type: "Display", + inputType: "text", + inputSource: "", }, + json: { + type: "Display", + inputType: "json", + inputSource: "", + }, + }, }; export default stepTemplates; diff --git a/src/transforms/searchReplace.js b/src/transforms/searchReplace.js new file mode 100644 index 0000000..6168ac2 --- /dev/null +++ b/src/transforms/searchReplace.js @@ -0,0 +1,179 @@ +import { + PerfRenderFromJson, + transforms, + mergeActions, +} from "proskomma-json-tools"; + +/** + * Evals string for matching regex syntax + * @param {string|RegExp} input + * @returns {RegExp} + */ + +const searchReplaceActions = { + startDocument: [ + { + description: "setup", + test: () => true, + action: (params) => { + const { workspace, output, context } = params; + console.log(params); + workspace.bookCode = context.document.metadata.document.bookCode; + workspace.chapter = null; + workspace.verses = null; + output.results = []; + return true; + }, + }, + ], + text: [ + { + description: "add-to-text", + test: () => true, + action: ({ config, context, workspace, output }) => { + try { + /** @type {string} */ + + const { + target, + replacement, + pointers = [], + config: _config = {}, + } = config; + const { + ctxLen = 10, + isRegex = false, + shouldMatchCase = false, + } = _config; + const flags = ["g"]; + if (shouldMatchCase) flags.push("i"); + const _isRegex = + typeof isRegex === "boolean" ? isRegex : isRegex === "true"; + const _text = context.sequences[0].element.text; + // console.log({ config, context, workspace, output }); + const { chapter, verses } = workspace; + + const evalRegex = (source) => { + if (source instanceof RegExp) return { source }; + if (typeof source !== "string") return; + const results = source.match(/\/(.+)\/(?=(\w*$))/); + if (!results?.[1]) return { source }; + return { source: results[1], flags: results[2] }; + }; + + const rgxData = _isRegex ? evalRegex(target) : { source: target }; + const _flags = rgxData.flags + ? [...new Set(rgxData.flags, ...flags)] + : flags; + const rgx = new RegExp(rgxData.source, _flags.join("")); + let index = 0; + const replaced = _text.replace(rgx, function (...args) { + const containsGroup = typeof args[args.length - 1] === "object"; + // const namedGroups = containsGroup ? args.pop() : undefined; + if (containsGroup) args.pop(); + const text = args.pop(); + const pos = args.pop(); + const match = args.shift(); + // const unnamedGroups = args; + const ctxBefore = text.slice(pos - ctxLen, pos); + const ctxAfter = text.slice( + pos + match.length, + pos + match.length + ctxLen + ); + const replaced = isRegex + ? match.replace(rgx, replacement) + : replacement; + const _pointer = [chapter, verses, index].join("---"); + const result = { + text: ctxBefore + match + replaced + ctxAfter, + pointer: _pointer, + metadata: { + bookCode: workspace.bookCode, + chapter, + verses, + index, + }, + hoverText: `${workspace.bookCode.toLowerCase()} ${chapter}:${verses}`, + }; + output.results.push(result); + index++; + if (pointers === "all") return replaced; + return pointers.includes(_pointer) ? replaced : match; + }); + workspace.outputContentStack[0].push(replaced); + if (!verses) return true; + return false; + } catch (err) { + console.error(err); + throw err; + } + }, + }, + ], + mark: [ + { + description: "mark-chapters", + test: ({ context }) => context.sequences[0].element.subType === "chapter", + action: ({ config, context, workspace, output }) => { + const element = context.sequences[0].element; + workspace.chapter = element.atts["number"]; + workspace.verses = 0; + return true; + }, + }, + { + description: "mark-verses", + test: ({ context }) => context.sequences[0].element.subType === "verses", + action: ({ config, context, workspace, output }) => { + const element = context.sequences[0].element; + workspace.verses = element.atts["number"]; + return true; + }, + }, + ], +}; + +const searchReplaceCode = function ({ perf, params }) { + const cl = new PerfRenderFromJson({ + srcJson: perf, + actions: mergeActions([ + searchReplaceActions, + transforms.perf2perf.identityActions, + ]), + }); + const output = {}; + cl.renderDocument({ + docId: "", + config: { + ...params, + }, + output, + }); + return { perf: output.perf, results: output.results }; // identityActions currently put PERF directly in output +}; + +const searchReplace = { + name: "searchReplace", + type: "Transform", + description: "Search and replace text in stripped perf", + inputs: [ + { + name: "perf", + type: "json", + source: "", + }, + { + name: "params", + type: "json", + source: "", + }, + ], + outputs: [ + { + name: "results", + type: "json", + }, + ], + code: searchReplaceCode, +}; +export default searchReplace; From 9540fb3801eb02e827bae78ae6727c832ff57080 Mon Sep 17 00:00:00 2001 From: Abel Perez Date: Tue, 8 Nov 2022 21:09:40 -0400 Subject: [PATCH 2/3] fixed monaco editor cache issue --- src/components/EditorWrapper.js | 218 ++++++++++++++++---------------- src/lib/types.js | 154 +++++++++++----------- 2 files changed, 189 insertions(+), 183 deletions(-) diff --git a/src/components/EditorWrapper.js b/src/components/EditorWrapper.js index 2aeeb53..5df4762 100644 --- a/src/components/EditorWrapper.js +++ b/src/components/EditorWrapper.js @@ -1,123 +1,129 @@ -import React, { useEffect, useRef, useState } from 'react'; +import React, { useEffect, useRef, useState } from "react"; import Editor, { DiffEditor } from "@monaco-editor/react"; const cleanEditorContent = { - title: "index", - value: "", - inputType: "json" -} + title: "index", + value: "", + inputType: "json", +}; const defaultOptions = { readOnly: true, domReadOnly: true }; function EditorWrapper({ results }) { - const [editorContent, setEditorContent] = useState(cleanEditorContent); - const [editorOptions, setEditorOptions] = useState(defaultOptions); + const [editorContent, setEditorContent] = useState(cleanEditorContent); + const [editorOptions, setEditorOptions] = useState(defaultOptions); + const [cacheKey, setCacheKey] = useState(1); - const editorRef = useRef(null); + const editorRef = useRef(null); - useEffect(() => { - console.log("updated editor"); - editorRef.current?.focus(); - }, [editorContent?.id]); + useEffect(() => { + editorRef.current?.focus(); + }, [editorContent?.id]); - const firstResult = results[0]?.id + const firstResult = results[0]?.id; - useEffect(() => { - if (!editorContent.id && firstResult) setEditorContent(results[results.length - 1]) - if (!results.length && editorContent.id) setEditorContent(cleanEditorContent); - }, [firstResult, editorContent, results]) - console.log({editorContent}) + useEffect(() => { + if (!editorContent.id && firstResult) + setEditorContent(results[results.length - 1]); + if (!results.length && editorContent.id) { + setEditorContent(cleanEditorContent); + setCacheKey((prev) => prev + 1); + } + }, [firstResult, editorContent, results]); - return ( + return ( + <> + {firstResult && ( <> - {firstResult && ( - <> - - - Save display to File - - - - - Toggle word wrap - - - - )} - {results.map((r, n) => { - return ( - + + + Save display to File + + + + + Toggle word wrap + + - ); + )} + {results.map((r, n) => { + return ( + + ); + })} + {editorContent.value?.type === "diff" ? ( + (editorRef.current = editor)} + /> + ) : ( + { + editorRef.current = editor; + }} + /> + )} + + ); } -export default EditorWrapper \ No newline at end of file +export default EditorWrapper; diff --git a/src/lib/types.js b/src/lib/types.js index afaaf2a..ff9a0c7 100644 --- a/src/lib/types.js +++ b/src/lib/types.js @@ -1,91 +1,91 @@ const types = { - bool: { - name: "Boolean", - test: v => !!v === v, - }, - number: { - name: "Number", - test: v => typeof v === 'number', - }, - string: { - name: "string", - test: v => typeof v === 'string', - }, - nonNullObject: { - name: "Non-Null Object", - test: v => typeof v === 'object' && v !== null, + bool: { + name: "Boolean", + test: (v) => !!v === v, + }, + number: { + name: "Number", + test: (v) => typeof v === "number", + }, + string: { + name: "string", + test: (v) => typeof v === "string", + }, + nonNullObject: { + name: "Non-Null Object", + test: (v) => typeof v === "object" && v !== null, + subTypes: { + array: { + name: "Array", + test: (v) => Array.isArray(v), + }, + kvObject: { + name: "Key-Value Object", + test: (v) => !Array.isArray(v), subTypes: { - array: { - name: "Array", - test: v => Array.isArray(v), + pkStructureDocument: { + name: "Proskomma Structure Document", + pkValidator: { + type: "structure", + key: "document", + version: "0.2.1", + }, + }, + perfSequence: { + name: "Proskomma PERF Sequence", + pkValidator: { + type: "constraint", + key: "perfSequence", + version: "0.2.1", + }, + }, + perfDocument: { + name: "Proskomma PERF Document", + pkValidator: { + type: "constraint", + key: "perfDocument", + version: "0.2.1", }, - kvObject: { - name: "Key-Value Object", - test: v => !Array.isArray(v), - subTypes: { - pkStructureDocument: { - name: "Proskomma Structure Document", - pkValidator: { - type: 'structure', - key: 'document', - version: '0.2.1' - } - }, - perfSequence: { - name: "Proskomma PERF Sequence", - pkValidator: { - type: 'constraint', - key: 'perfSequence', - version: '0.2.1' - } - }, - perfDocument: { - name: "Proskomma PERF Document", - pkValidator: { - type: 'constraint', - key: 'perfDocument', - version: '0.2.1' - } - }, - sofriaDocument: { - name: "Proskomma SOFRIA Document", - pkValidator: { - type: 'constraint', - key: 'sofriaDocument', - version: '0.2.1' - } - }, - } - } - } + }, + sofriaDocument: { + name: "Proskomma SOFRIA Document", + pkValidator: { + type: "constraint", + key: "sofriaDocument", + version: "0.2.1", + }, + }, + }, + }, }, + }, }; const flattenTypes = (typesObject, ancestors, passedRet) => { - if (!ancestors) { - ancestors = []; + if (!ancestors) { + ancestors = []; + } + const ret = passedRet || {}; + for (const [key, value] of Object.entries(typesObject)) { + ret[key] = { name: value.name }; + if ("test" in value) { + ret[key].test = value.test; + } + if ("pkValidator" in value) { + ret[key].pkValidator = value.pkValidator; } - const ret = passedRet || {}; - for (const [key, value] of Object.entries(typesObject)) { - ret[key] = {name: value.name}; - if ("test" in value) { - ret[key].test = value.test; - } - if ("pkValidator" in value) { - ret[key].pkValidator = value.pkValidator; - } - if (ancestors.length > 0) { - ret[key].super = ancestors; - } - if ("subTypes" in value) { - flattenTypes(value.subTypes, [...ancestors, key], ret); - } + if (ancestors.length > 0) { + ret[key].super = ancestors; } - return ret; -} + if ("subTypes" in value) { + flattenTypes(value.subTypes, [...ancestors, key], ret); + } + } + return ret; +}; const flattenedTypes = flattenTypes(types); -console.log(flattenedTypes, null, 2); +// console.log(flattenedTypes, null, 2); export default flattenedTypes; From e1d967def1bf987f7f4b69b782588b20eda865c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Abel=20P=C3=A9rez?= Date: Tue, 8 Nov 2022 21:27:06 -0400 Subject: [PATCH 3/3] Update searchReplace.js --- src/transforms/searchReplace.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/transforms/searchReplace.js b/src/transforms/searchReplace.js index 6168ac2..17d53d9 100644 --- a/src/transforms/searchReplace.js +++ b/src/transforms/searchReplace.js @@ -4,12 +4,6 @@ import { mergeActions, } from "proskomma-json-tools"; -/** - * Evals string for matching regex syntax - * @param {string|RegExp} input - * @returns {RegExp} - */ - const searchReplaceActions = { startDocument: [ {