From e944de81dca075a46b032900bb043a94bdc73d85 Mon Sep 17 00:00:00 2001
From: benoit74 <benoit74@users.noreply.github.com>
Date: Tue, 7 May 2024 06:44:05 +0000
Subject: [PATCH] Adjust magic numbers used for XML manipulation

To avoid parsing XML, some magic number are used to trim opening and
closing markup. The dump format has slightly changed and these magic
numbers had to be adapted.
---
 src/sotoki/utils/preparation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/sotoki/utils/preparation.py b/src/sotoki/utils/preparation.py
index e3e160b..c15bc4b 100644
--- a/src/sotoki/utils/preparation.py
+++ b/src/sotoki/utils/preparation.py
@@ -233,7 +233,7 @@ def read_sub():
             main_id = get_id_in(main_line, field_index_in_main)
 
             # write main line to dest; removing tag end (/> -> >) and CRLF
-            dsth.write(main_line[:-4])
+            dsth.write(main_line[:-3])
             dsth.write(b">")
 
             # fetch subs matching this ID (IDs are sorted so it's continuous)
@@ -248,7 +248,7 @@ def read_sub():
                 dsth.write(node_start)
                 # write the sub line removing the 2 heading spaces, node name (<row)
                 # removing trailing CRLF as well. node already self closed in source
-                dsth.write(current_sub[1][6:-2])
+                dsth.write(current_sub[1][4:-1])
                 current_sub = read_sub()
 
             if has_subs:
@@ -335,7 +335,7 @@ def split_posts_by_posttypeid(
             try:
                 # rewrite with new name replacing `  <row` and `row>`
                 fhs[found_id].write(starts[found_id])
-                fhs[found_id].write(line[6:-5])
+                fhs[found_id].write(line[4:-5])
                 fhs[found_id].write(ends[found_id])
             except KeyError:
                 continue
@@ -404,7 +404,7 @@ def read_csv():
             if current_csv[0] == post_id:
                 # write user line to dest; removing tag end and CRLF
                 dsth.write(b"<link")
-                dsth.write(line[6:-4])
+                dsth.write(line[4:-3])
                 # CSV title already includes appropriate quoting
                 dsth.write(b" PostName=")
                 dsth.write(current_csv[1])