From dc27774d49187e4c127173293a54577c1b8234f5 Mon Sep 17 00:00:00 2001 From: Emerson Rocha Date: Sun, 5 Jun 2022 02:49:29 -0300 Subject: [PATCH] rdf+bcp47+hxl (#41): partial refactoring; bcp47-to-hxl-to-rdf.hxl.tsv reference ajusted --- officina/999999999/0/L999999999_0.py | 3 +- officina/999999999/1568346/README.md | 14 ++++--- .../1568346/bcp47-to-hxl-to-rdf.hxl.tsv | 23 +++++------ .../999999999/1568346/bcp47-to-hxl-to-rdf.sh | 40 ++++++++++++++++++- 4 files changed, 59 insertions(+), 21 deletions(-) diff --git a/officina/999999999/0/L999999999_0.py b/officina/999999999/0/L999999999_0.py index 95ef4e0..eafae00 100644 --- a/officina/999999999/0/L999999999_0.py +++ b/officina/999999999/0/L999999999_0.py @@ -655,6 +655,7 @@ def bcp47_langtag_callback_hxl( if len(_r["xsl:transform"]) > 0: value_prefixes = None + value_separator = None for titem in _r["xsl:transform"]: tverb, tval_1, _nop_tval_2 = titem.split(':') @@ -2602,7 +2603,7 @@ def hxl_hashtag_to_bcp47(hashtag: str) -> str: _cell_transformer = item[2:] tverb, tval_1 = _cell_transformer.split('_') # raise ValueError(item) - _bpc47_g_parts.append('t{0}-t{1}-ynop'.format( + _bpc47_g_parts.append('t{0}-t{1}-tnop'.format( tverb.upper(), tval_1.lower() )) diff --git a/officina/999999999/1568346/README.md b/officina/999999999/1568346/README.md index 136a46c..405bb47 100644 --- a/officina/999999999/1568346/README.md +++ b/officina/999999999/1568346/README.md @@ -13,9 +13,10 @@ ### Explode list of items Full Example: -- Inspiration: https://www.w3.org/ns/csvw.ttl#separator -- HXL hashtag: `#item+rem+i_qcc+is_zxxx+rdf_y_csvwseparator_u007c` -- BCP 47: `qcc-Zxxx-r-yCSVWseparator-u007c` +- Inspiration: https://www.w3.org/ns/csvw.ttl#separator +- Inspiration: (group separator, ASCII) https://www.wikidata.org/wiki/Q110028713 +- HXL hashtag: `#item+rem+i_qcc+is_zxxx+rdf_y_u001d_u007c` +- BCP 47: `qcc-Zxxx-r-yU001D-yu007c-ynop` - Cell value: `concept4938|concept7597` - Transformed result: - concept4938 @@ -23,9 +24,10 @@ Full Example: ### Apply prefix to items Full Example: -- Inspiration: (did exist some namespace for this? Maybe is so basic most tools do it hardcoded) -- HXL hashtag: `#item+rem+i_qcc+is_zxxx+rdf_y_prefix_unescothes` -- BCP 47: `qcc-Zxxx-r-yPREFIX-unescothes` +- Inspiration: (did exist some namespace for this? Maybe is so basic most tools do it hardcoded) +- Inspiration: (start of text, ASCII) https://www.wikidata.org/wiki/Q10366650 +- HXL hashtag: `#item+rem+i_qcc+is_zxxx+rdf_y_u0002_unescothes` +- BCP 47: `qcc-Zxxx-r-yU0002-yunescothes-ynop` - Cell value: `concept10` - Transformed result: - unescothes:concept10 diff --git a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.hxl.tsv b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.hxl.tsv index bae78c1..70645aa 100644 --- a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.hxl.tsv +++ b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.hxl.tsv @@ -1,14 +1,13 @@ #item+bpc47 #item+hxl #item+rdf #item+namespace qcc-Zxxx +i_qcc+is_zxxx -qcc-Zxxx-r-pRDF-type +i_qcc+is_zxxx+ir_rrdf_type rdf:type -lat-Latn-r-pSKOS-prefLabel +i_lat+is_latn+rdf_p_skos_preflabel skos:prefLabel/[$0]@lat-Latn -qcc-Zxxx-r-pSKOS-broader +i_qcc+is_zxxx+rdf_p_skos_broader skos:broader skos/core# -qcc-Zxxx-r-pSKOS-narrower +i_qcc+is_zxxx+rdf_p_skos_narrower skos:narrower 02/skos/core# -qcc-Zxxx-r-pSKOS-narrower-yCSVWseparator-u007c +i_qcc+is_zxxx+rdf_p_skos_narrower+rdf_y_csvwseparator_u007c skos:narrower 02/skos/core# -qcc-Zxxx-r-pSKOS-related +i_qcc+is_zxxx+rdf_p_skos_related skos:related skos/core# -qcc-Zxxx-r-pDCT-modified +i_qcc+is_zxxx+rdf_p_dct_modified dct:modified -qcc-Zxxx-r-pDCT-modified-tXSD-datetime +i_qcc+is_zxxx+rdf_p_dct_modified+rdf_t_xsd_datetime dct:modified/[$0]^^xsd:datetime | -qcc-Zxxx-r-pDCT-modified-tXSD-datetime-x-wikip5017 +i_qcc+is_zxxx+rdf_p_dct_modified+rdf_t_xsd_datetime+ix_wikip5017 dct:modified/[$0]^^xsd:datetime | -eng-Latn-r-pDC-contributor-pDC-creator-pDC-publisher +i_eng+is_latn+rdf_p_dc_contributor+rdf_p_dc_creator+rdf_p_dc_publisher dc:contributor|dc:creator|dc:publisher -qcc-Zxxx-r-sU2200-s0 +i_qcc+is_zxxx+rdf_s_u2200_s0 ((rdf:subject 0)) -qcc-Zxxx-r-yPREFIX-unescothes +i_qcc+is_zxxx+rdf_s_u2200_s0 +lat-Latn-r-pSKOS-pprefLabel-ps1 +i_lat+is_latn+rdf_p_skos_preflabel_s1 skos:prefLabel/[$0]@lat-Latn +qcc-Zxxx-r-pSKOS-pbroader-ps1 +i_qcc+is_zxxx+rdf_p_skos_broader_s1 skos:broader skos/core# +qcc-Zxxx-r-pSKOS-pnarrower-ps1 +i_qcc+is_zxxx+rdf_p_skos_narrower_s1 skos:narrower 02/skos/core# +qcc-Zxxx-r-pSKOS-pnarrower-ps1-yU001D-yu007c-ynop +i_qcc+is_zxxx+rdf_p_skos_narrower_s1+rdf_y_u001d_u007c skos:narrower 02/skos/core# +qcc-Zxxx-r-pSKOS-prelated-ps1 +i_qcc+is_zxxx+rdf_p_skos_related_s1 skos:related skos/core# +qcc-Zxxx-r-pDCT-pmodified-ps1 +i_qcc+is_zxxx+rdf_p_dct_modified_s1 dct:modified +qcc-Zxxx-r-pDCT-pmodified-ps1-tXSD-tdatetime-tnop +i_qcc+is_zxxx+rdf_p_dct_modified_s1+rdf_t_xsd_datetime dct:modified/[$0]^^xsd:datetime | +qcc-Zxxx-r-pDCT-pmodified-ps1-tXSD-tdatetime-tnop-x-wikip5017 +i_qcc+is_zxxx+ix_wikip5017+rdf_p_dct_modified_s1+rdf_t_xsd_datetime dct:modified/[$0]^^xsd:datetime | +eng-Latn-r-pDC-pcontributor-ps1-pDC-pcreator-ps1-pDC-ppublisher-ps1 +i_eng+is_latn+rdf_p_dc_contributor_s1+rdf_p_dc_creator_s1+rdf_p_dc_publisher_s1 dc:contributor|dc:creator|dc:publisher +qcc-Zxxx-r-sU2200-s0-snop +i_qcc+is_zxxx+rdf_s_u2200_s0 ((rdf:subject 0)) +qcc-Zxxx-r-yU0002-yunescothes-ynop +i_qcc+is_zxxx+rdf_s_u2200_s0 diff --git a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh index 30245eb..401407e 100755 --- a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh +++ b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh @@ -225,6 +225,9 @@ bcp47_and_hxlrdf_roundtrip() { # Test result ####################################### bcp47_and_hxlrdf_roundtrip__drill() { + + archivum__regulae_exemplis="${ROOTDIR}/999999999/1568346/bcp47-to-hxl-to-rdf.hxl.tsv" + echo "" echo " test1" bcp47_and_hxlrdf_roundtrip \ @@ -242,12 +245,45 @@ bcp47_and_hxlrdf_roundtrip__drill() { "" echo "" - echo " test3" + echo " test3 (sort output)" bcp47_and_hxlrdf_roundtrip \ "qcc-Zxxx-r-sU2203-s2-snop-yU001D-yu007c-ynop-yU0002-yunescothes-ynop-pSKOS-pbroader-ps2-tXSD-tdatetime-tnop" \ "" \ - "qcc-Zxxx-r-pSKOS-pbroader-ps2-sU2203-s2-snop-tXSD-tdatetime-ynop-yU0002-yunescothes-ynop-yU001D-yu007c-ynop" \ + "qcc-Zxxx-r-pSKOS-pbroader-ps2-sU2203-s2-snop-tXSD-tdatetime-tnop-yU0002-yunescothes-ynop-yU001D-yu007c-ynop" \ "" + + index_now=$(( 4 )) + + # Will fail without manual ajusts: + # - lat-Latn-r-pSKOS-pprefLabel-ps1 + + # while IFS=, read -r iso3 source_url; do + { + # remove read -r to not skip first line + read -r + while IFS=$'\t' read -r -a linea; do + bpc47="${linea[0]}" + hxl="${linea[1]}" + rdf="${linea[2]}" + # namespace="${linea[3]}" + + echo "" + echo " test ${index_now}" + + bcp47_and_hxlrdf_roundtrip \ + "${bpc47}" \ + "${hxl}" \ + "${bpc47}" \ + "${hxl}" + + # # echo "numerordinatio_praefixo $numerordinatio_praefixo" + # # bootstrap_1603_45_16__item "1603_45_16_24" "24" "AGO" "AO" "3" "1" "0" + # bootstrap_1603_45_16__item "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "$cod_ab_level_max" "1" "0" + # # bootstrap_1603_45_16__item "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "1" "0" + # sleep 5 + index_now=$(( index_now + 1 )) + done + } <"${archivum__regulae_exemplis}" } # echo "test"