From c503bc9132191cc2013827558de49156db8846bf Mon Sep 17 00:00:00 2001 From: Emerson Rocha Date: Sat, 4 Jun 2022 23:52:25 -0300 Subject: [PATCH] rdf+bcp47+hxl (#41): partial refactoring; rountrip drill with expected results --- officina/999999999/0/L999999999_0.py | 38 ++++---- .../999999999/1568346/bcp47-to-hxl-to-rdf.sh | 86 +++++++++++++++---- 2 files changed, 93 insertions(+), 31 deletions(-) diff --git a/officina/999999999/0/L999999999_0.py b/officina/999999999/0/L999999999_0.py index f9e5c7f..95ef4e0 100644 --- a/officina/999999999/0/L999999999_0.py +++ b/officina/999999999/0/L999999999_0.py @@ -623,7 +623,6 @@ def bcp47_langtag_callback_hxl( str: return HXL attributes (without HXL hashtag) """ - resultatum = [] # resultatum.append('+todo') resultatum.append('+i_{0}'.format(langtag_meta['language'].lower())) @@ -2537,6 +2536,7 @@ def hxl_hashtag_to_bcp47(hashtag: str) -> str: if len(rdf_parts) > 0: result['_callbacks']['rdf_parts'] = rdf_parts + # value_prefixes = None for item in rdf_parts: if item.startswith('s_'): # _subject= item.replace('s_', '').replace('_', ':') @@ -2557,7 +2557,7 @@ def hxl_hashtag_to_bcp47(hashtag: str) -> str: elif item.startswith('p_'): _predicate = item.replace('p_', '').replace('_', ':') result['extension']['r']['rdf:predicate'].append(_predicate) - _predicate_key, _object, _subject= _predicate.split(':') + _predicate_key, _object, _subject = _predicate.split(':') _bpc47_g_parts.append('p{0}-p{1}-p{2}'.format( _predicate_key.upper(), _object, _subject )) @@ -2565,39 +2565,47 @@ def hxl_hashtag_to_bcp47(hashtag: str) -> str: elif item.startswith('y_'): # _cell_transformer = item.replace('y_', '').lower() _cell_transformer = item[2:] - _tkey, _tvalue = _cell_transformer.split('_') + tverb, tval_1 = _cell_transformer.split('_') # if _tkey == 'csvwseparator': # print('oi', _tkey, _tvalue) - if _tkey == EXTRA_OPERATORS['GS']['hxl']: + if tverb.lower() == EXTRA_OPERATORS['GS']['hxl']: # _cell_separator = CSVW_SEPARATORS[_tvalue] decoded_separator = None - if _tvalue in CSVW_SEPARATORS: - decoded_separator = _tvalue + if tval_1 in CSVW_SEPARATORS: + decoded_separator = tval_1 # encoded_separator = CSVW_SEPARATORS[_tvalue] if decoded_separator is None: raise NotImplementedError( '[{0}] [{1}] not implemented in <{2}>'.format( - _tvalue, hashtag, CSVW_SEPARATORS + tval_1, hashtag, CSVW_SEPARATORS )) # result['extension']['r']['csvw:separator'] = \ # decoded_separator # _predicate_key, _object = _predicate.split(':') - _bpc47_g_parts.append('y{0}-y{1}'.format( + _bpc47_g_parts.append('y{0}-y{1}-ynop'.format( EXTRA_OPERATORS['GS']['hxl'].upper(), decoded_separator )) - elif _tkey == 'prefix': - if 'prefix' not in result['extension']['r']: - result['extension']['r']['prefix'] = [] - result['extension']['r']['prefix'].append(_tvalue.lower()) - # _predicate_key, _object = _predicate.split(':') - _bpc47_g_parts.append('yPREFIX-{0}'.format( - _tvalue.lower() + elif tverb == EXTRA_OPERATORS['STX']['hxl']: + # if value_prefixes is None: + # value_prefixes = [] + # value_prefixes.append(tval_1) + _bpc47_g_parts.append('y{0}-y{1}-ynop'.format( + tverb.upper(), tval_1.lower() )) else: result['_unknown'].append('rdf_parts [{0}]'.format(item)) + elif item.startswith('t_'): + # _cell_transformer = item.replace('y_', '').lower() + _cell_transformer = item[2:] + tverb, tval_1 = _cell_transformer.split('_') + # raise ValueError(item) + _bpc47_g_parts.append('t{0}-t{1}-ynop'.format( + tverb.upper(), tval_1.lower() + )) + elif item.startswith('o_'): _object = item.replace('o_', '').replace('_', ':') result['extension']['r']['rdf:object'].append(_object) diff --git a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh index f0f8d24..30245eb 100755 --- a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh +++ b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh @@ -124,7 +124,7 @@ test_unesco_thesaurus() { } ####################################### -# test_unesco_thesaurus +# bcp47_and_hxlrdf_roundtrip item # # Globals: # ROOTDIR @@ -136,14 +136,19 @@ test_unesco_thesaurus() { bcp47_and_hxlrdf_roundtrip() { bpc47="${1-""}" hxlattr="${2-""}" - bpc47_final="${4-""}" - hxlattr_final="${3-""}" + bpc47_final="${3-""}" + hxlattr_final="${4-""}" hxlattr_discovered="" hxlattr_discovered_2nd="" bpc47_discovered="" bpc47_discovered_2nd="" + stype_blue=$(tput setaf 4) + stype_green=$(tput setaf 2) + style_red=$(tput setaf 1) + style_normal=$(tput sgr0) + if [ -n "$bpc47" ]; then echo "[$bpc47] bpc47 input" @@ -160,8 +165,19 @@ bcp47_and_hxlrdf_roundtrip() { bpc47_discovered_2nd=${bpc47_discovered_2nd//\"/} echo "[$bpc47_discovered_2nd] bpc47_discovered_2nd" - else - echo "noop bpc47" + + if [ -n "$bpc47_final" ]; then + if [ "$bpc47_final" = "$bpc47_discovered_2nd" ]; then + echo "${stype_green}OK [$bpc47_final]${style_normal}" + # printf "$STARTCOLOR%b$ENDCOLOR" "$1"; + else + echo "${style_red}FAILED [$bpc47_final]${style_normal}" + fi + else + echo "${stype_blue}INFO: No enforced expected result${style_normal}" + fi + # else + # echo "noop bpc47" fi if [ -n "$hxlattr" ]; then @@ -181,24 +197,62 @@ bcp47_and_hxlrdf_roundtrip() { hxlattr_discovered_2nd=${hxlattr_discovered_2nd//\"/} echo "[$hxlattr_discovered_2nd] hxlattr_discovered_2nd" - else - echo "noop hxlattr" + if [ -n "$hxlattr_final" ]; then + if [ "$hxlattr_final" = "$hxlattr_discovered_2nd" ]; then + echo "${stype_green}OK [$hxlattr_final]${style_normal}" + else + echo "${style_red}FAILED [$hxlattr_final]${style_normal}" + fi + else + echo "${stype_blue}INFO: No enforced expected result${style_normal}" + fi + + # else + # echo "noop hxlattr" fi return 0 } +####################################### +# bcp47_and_hxlrdf_roundtrip item +# +# Globals: +# ROOTDIR +# Arguments: +# None +# Outputs: +# Test result +####################################### +bcp47_and_hxlrdf_roundtrip__drill() { + echo "" + echo " test1" + bcp47_and_hxlrdf_roundtrip \ + "qcc-Zxxx-r-sU2203-s2-snop" \ + "" \ + "qcc-Zxxx-r-sU2203-s2-snop" \ + "" + + echo "" + echo " test2" + bcp47_and_hxlrdf_roundtrip \ + "" \ + "+i_qcc+is_zxxx+rdf_s_u2203_s2" \ + "" \ + "" + + echo "" + echo " test3" + bcp47_and_hxlrdf_roundtrip \ + "qcc-Zxxx-r-sU2203-s2-snop-yU001D-yu007c-ynop-yU0002-yunescothes-ynop-pSKOS-pbroader-ps2-tXSD-tdatetime-tnop" \ + "" \ + "qcc-Zxxx-r-pSKOS-pbroader-ps2-sU2203-s2-snop-tXSD-tdatetime-ynop-yU0002-yunescothes-ynop-yU001D-yu007c-ynop" \ + "" +} + # echo "test" # bcp47_to_hxl_to_rdf__tests # test_unesco_thesaurus -echo "" -echo " test1" -bcp47_and_hxlrdf_roundtrip "qcc-Zxxx-r-sU2203-s2-snop" "" -echo "" -echo " test2" -bcp47_and_hxlrdf_roundtrip "" "+i_qcc+is_zxxx+rdf_s_u2203_s2" -echo "" -echo " test3" -bcp47_and_hxlrdf_roundtrip "qcc-Zxxx-r-sU2203-s2-snop-yU001D-yu007c-ynop-yU0002-yunescothes-ynop-pSKOS-pbroader-ps2-tXSD-tdatetime-tnop" "" +bcp47_and_hxlrdf_roundtrip__drill