From e944de81dca075a46b032900bb043a94bdc73d85 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 7 May 2024 06:44:05 +0000 Subject: [PATCH] Adjust magic numbers used for XML manipulation To avoid parsing XML, some magic number are used to trim opening and closing markup. The dump format has slightly changed and these magic numbers had to be adapted. --- src/sotoki/utils/preparation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sotoki/utils/preparation.py b/src/sotoki/utils/preparation.py index e3e160b..c15bc4b 100644 --- a/src/sotoki/utils/preparation.py +++ b/src/sotoki/utils/preparation.py @@ -233,7 +233,7 @@ def read_sub(): main_id = get_id_in(main_line, field_index_in_main) # write main line to dest; removing tag end (/> -> >) and CRLF - dsth.write(main_line[:-4]) + dsth.write(main_line[:-3]) dsth.write(b">") # fetch subs matching this ID (IDs are sorted so it's continuous) @@ -248,7 +248,7 @@ def read_sub(): dsth.write(node_start) # write the sub line removing the 2 heading spaces, node name (` fhs[found_id].write(starts[found_id]) - fhs[found_id].write(line[6:-5]) + fhs[found_id].write(line[4:-5]) fhs[found_id].write(ends[found_id]) except KeyError: continue @@ -404,7 +404,7 @@ def read_csv(): if current_csv[0] == post_id: # write user line to dest; removing tag end and CRLF dsth.write(b"