From bcbae9bec88982d299bd507fdfbf2f90fae9c6d8 Mon Sep 17 00:00:00 2001 From: Alexander Bilz Date: Sat, 29 Jun 2024 13:32:35 +0200 Subject: [PATCH] fix: JSON decode error --- .github/workflows/build.yaml | 7 +------ src/forensicsim/backend.py | 2 +- src/forensicsim/parser.py | 26 +++++++++++++++++--------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4745f6e..1501b35 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -39,9 +39,4 @@ jobs: python tools/main.py --help python tools/dump_leveldb.py --help python tools/dump_localstorage.py --help - python tools/dump_sessionstorage.py --help - - name: Calculate diff 👽 - run: | - git diff --no-index --word-diff .\forensicsim-data\expected-result\jane_doe_v_1_4_00_11161.json jane_doe_v_1_4_00_11161.json - git diff --no-index --word-diff .\forensicsim-data\expected-result\john_doe_v_1_4_00_11161.json john_doe_v_1_4_00_11161.json - git diff --no-index --word-diff .\forensicsim-data\expected-result\karelze_v_23306_3309_2530_1346.json karelze_v_23306_3309_2530_1346.json \ No newline at end of file + python tools/dump_sessionstorage.py --help \ No newline at end of file diff --git a/src/forensicsim/backend.py b/src/forensicsim/backend.py index ef98294..6ee60e9 100644 --- a/src/forensicsim/backend.py +++ b/src/forensicsim/backend.py @@ -129,7 +129,7 @@ def write_results_to_json(data: list[dict[str, Any]], outputpath: Path) -> None: try: with open(outputpath, "w", encoding="utf-8") as f: json.dump( - data, f, indent=4, sort_keys=True, default=str, ensure_ascii=False + data, f, indent=4, default=str, ensure_ascii=False ) except OSError as e: print(e) diff --git a/src/forensicsim/parser.py b/src/forensicsim/parser.py index 3ec3084..afebb15 100644 --- a/src/forensicsim/parser.py +++ b/src/forensicsim/parser.py @@ -2,6 +2,7 @@ import warnings from dataclasses import dataclass, field from datetime import datetime +from json import JSONDecodeError from pathlib import Path from typing import Any, Optional, Union @@ -26,15 +27,22 @@ def strip_html_tags(value: str) -> str: def decode_dict(properties: Union[bytes, str, dict]) -> dict[str, Any]: - if isinstance(properties, bytes): - soup = BeautifulSoup(properties, features="html.parser") - properties = properties.decode(soup.original_encoding) - if isinstance(properties, dict): - # handle case where nested childs are dicts or list but provided with "" but have to be expanded. - for key, value in properties.items(): - if isinstance(value, str) and value.startswith(("[", "{")): - properties[key] = json.loads(value, strict=False) - return properties + try: + if isinstance(properties, bytes): + soup = BeautifulSoup(properties, features="html.parser") + properties = properties.decode( + encoding=soup.original_encoding, errors="ignore" + ) + if isinstance(properties, dict): + # handle case where nested childs are dicts or list but provided with "" but have to be expanded. + for key, value in properties.items(): + if isinstance(value, str) and value.startswith(("[", "{")): + properties[key] = json.loads(value, strict=False) + return properties + except JSONDecodeError as e: + print(e) + print("Couldn't decode dictionary ", properties) + return {} return json.loads(properties, strict=False)