diff --git a/officina/999999999/0/999999999_54872.py b/officina/999999999/0/999999999_54872.py index d498c0b..eae2d5f 100755 --- a/officina/999999999/0/999999999_54872.py +++ b/officina/999999999/0/999999999_54872.py @@ -47,6 +47,7 @@ bcp47_rdf_extension_poc, hxltm_carricato, HXLTMAdRDFSimplicis, + hxltm_carricato_brevibus, rdf_namespaces_extras ) @@ -98,6 +99,13 @@ Temporary tests . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +(Debug information in JSON) + {0} --objectivum-formato=_temp_bcp47_meta_in_json \ +--rdf-namespaces-archivo=\ +999999999/1568346/data/hxlstandard-rdf-namespaces-example.hxl.csv \ +999999999/1568346/data/unesco-thesaurus.bcp47g.tsv + +(Data operations) {0} --objectivum-formato=_temp_bcp47 --rdf-namespaces-archivo=\ 999999999/1568346/data/hxlstandard-rdf-namespaces-example.hxl.csv \ 999999999/1568346/data/unesco-thesaurus.bcp47g.tsv @@ -237,6 +245,7 @@ def make_args(self, hxl_output=True): # # - Uses '.ndjson' as extension # 'application/x-ndjson', '_temp_bcp47', + '_temp_bcp47_meta_in_json', ], # required=True default='application/x-turtle' @@ -280,7 +289,6 @@ def make_args(self, hxl_output=True): default=None ) - # praefīxum , n, s, nominativus, # https://en.wiktionary.org/wiki/praefixus#Latin # cōnfigūrātiōnī, f, s, dativus, @@ -332,6 +340,16 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, # print(RDF_NAMESPACES_EXTRAS) # pass + # @TODO maybe refactor this temporary part + if pyargs.objectivum_formato == '_temp_bcp47_meta_in_json': + caput, data = hxltm_carricato_brevibus( + _infile, _stdin, punctum_separato="\t") + + meta = bcp47_rdf_extension_poc( + caput, data, objective_bag=pyargs.rdf_bag, est_meta=True) + print(json.dumps(meta, sort_keys=False, ensure_ascii=False)) + return self.EXIT_OK + # @TODO remove thsi temporary part if pyargs.objectivum_formato == '_temp_bcp47': caput, data = hxltm_carricato( diff --git a/officina/999999999/0/L999999999_0.py b/officina/999999999/0/L999999999_0.py index 114c5ed..b4928a5 100644 --- a/officina/999999999/0/L999999999_0.py +++ b/officina/999999999/0/L999999999_0.py @@ -1356,6 +1356,10 @@ def bcp47_rdf_extension_relationship( for subject in item_meta['extension']['r']['rdf:subject']: # is_pivot_key = False subject_key, subject_value = subject.split(':') + _temp1, _temp2 = subject.split('||') + subject_key = _temp1 + subject_value = _temp2.replace(':NOP', '') + # raise ValueError(subject) # if subject.startswith('∀'): # is_pivot_key = True # subject = subject.replace('∀', '') @@ -1434,6 +1438,7 @@ def bcp47_rdf_extension_poc( objective_bag: str = '1', _auxiliary_bags: List[str] = None, namespaces: List[dict] = None, + est_meta: bool = False, strictum: bool = True ) -> dict: """bcp47_rdf_extension_poc _summary_ @@ -1472,7 +1477,6 @@ def bcp47_rdf_extension_poc( result = { 'header': header, 'header_result': [], - 'data': data, # 'rdf:subject': None, # 'rdf:predicate': [], # 'rdf:object': None, @@ -1481,6 +1485,7 @@ def bcp47_rdf_extension_poc( 'triples': [], # We always start with default prefixes 'prefixes': RDF_NAMESPACES, + 'data': data, '_error': [], } # return {} @@ -1568,10 +1573,13 @@ def _helper_aux( # len(bag_meta['prefix']) > 0: # value_prefixes = bag_meta['prefix'] - for predicate in bag_meta['rdf:predicate']: + for predicate_and_subject in bag_meta['rdf:predicate']: if not object_literal: continue + _temp1, _temp2 = predicate_and_subject.split('||') + predicate = _temp1 + if value_separator is not None and \ object_literal.find(value_separator) > -1 and \ object_literal.find('\\' + value_separator) == -1: @@ -1641,10 +1649,9 @@ def _helper_aux( if len(aux_triples) > 0: result['triples'].extend(aux_triples) - # raise ValueError(meta) - - # result['prefixes'] = RDF_NAMESPACES - # result['prefixes'] = meta['prefixes'] + if est_meta: + # return bag_meta + return result return result # return result['triples'] @@ -3863,6 +3870,69 @@ def hxltm_carricato( return caput, data +def hxltm_carricato_brevibus( + archivum_trivio: str = None, + est_stdin: bool = False, + punctum_separato: str = ",", + data_lineis: int = 3, + est_hxl: bool = False +) -> list: + """hxltm_carricato_brevibus read only header and part of the data + + Note: this helper is not as efficent as read line by line. But some + operations already require such task. + + Trivia: + - carricātō, n, s, dativus, https://en.wiktionary.org/wiki/carricatus#Latin + - verbum: https://en.wiktionary.org/wiki/carricatus#Latin + - capitī, s, n, https://en.wiktionary.org/wiki/caput#Latin + - brevibus, pl, m/f/n, https://en.wiktionary.org/wiki/brevis#Latin + + Args: + archivum_trivio (str, optional): Path to file. Defaults to None. + est_stdin (bool, optional): Is the file stdin?. Defaults to False. + + Returns: + list: list of [caput, data], where data is array of lines + """ + caput = [] + + if est_stdin: + _data = [] + for linea in sys.stdin: + if len(caput) == 0: + # caput = linea + # _reader_caput = csv.reader(linea) + _gambi = [linea, linea] + _reader_caput = csv.reader(_gambi, delimiter=punctum_separato) + caput = next(_reader_caput) + else: + if data_lineis <= 0: + pass + else: + data_lineis -= 1 + _data.append(linea) + _reader = csv.reader(_data) + return caput, list(_reader) + # return caput + # else: + # fons = archivum_trivio + data = [] + with open(archivum_trivio, 'r') as _fons: + _csv_reader = csv.reader(_fons, delimiter=punctum_separato) + for linea in _csv_reader: + if len(caput) == 0: + caput = linea + else: + if data_lineis <= 0: + break + else: + data_lineis -= 1 + data.append(linea) + + return caput, data + + def hxltm_cum_ordinibus_ex_columnis( caput: list, data: list, quaestio: list, data_referentibus: dict = None ) -> Tuple[list, list]: diff --git a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh index f71d378..f947674 100755 --- a/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh +++ b/officina/999999999/1568346/bcp47-to-hxl-to-rdf.sh @@ -290,7 +290,7 @@ bcp47_and_hxlrdf_roundtrip__drill() { # echo "test" -# bcp47_to_hxl_to_rdf__tests +bcp47_to_hxl_to_rdf__tests # test_unesco_thesaurus bcp47_and_hxlrdf_roundtrip__drill