Skip to content

Commit

Permalink
chore(live-tests): comments for record_obfuscator.py (airbytehq#46661)
Browse files Browse the repository at this point in the history
  • Loading branch information
natikgadzhi authored Oct 9, 2024
1 parent b42534c commit 439fbe0
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion tools/bin/record_obfuscator.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,32 @@
#!/usr/bin/env python3
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

from __future__ import annotations

import hashlib
import json
import sys
from typing import Any

#
# record_obfuscator is a tiny script that:
# 1. reads JSON lines from stdin
# 2. obfuscates the data in AirbyteRecordMessage lines
# 3. spits obfuscated lines back to stdout
# All stdin lines that are not type: RECORD remain unchanged.
#
# It's used in live-tests (airbyte-ci/connectors/live-tests) to be able to dump raw data from a live connection
# without leaking actual sensitive production data.
#
# The script is copied over into the live tests runnder dagger container.
#


def _generate_hash(value: Any) -> str:
return hashlib.sha256(str(value).encode()).hexdigest()


def obfuscate(value: Any) -> Any:
"""Returns an obfuscated version of the input value while retaiining it's type and length information."""
if isinstance(value, str):
obfuscated_value = f"string_len-{len(value)}_" + _generate_hash(value)
elif isinstance(value, int):
Expand Down Expand Up @@ -41,6 +55,8 @@ def obfuscate(value: Any) -> Any:
except Exception as exc:
# We don't expect invalid json so if we see it, it will go to stderr
sys.stderr.write(f"{line}\n")

# try / except / else: Else block runs only if no exceptions were raised.
else:
if data.get("type") == "RECORD":
record_data = data["record"].get("data", {})
Expand Down

0 comments on commit 439fbe0

Please sign in to comment.