Rebased with reluctant refactoring

To simplify edge cases, I give in, although for the record I'm not happy about giving up on the line-based truncation: I just want it to be done. (And, ultimately, I don't think it really matters all that much.)
distributed-system-analysis · Apr 3, 2024 · fa2c8d2 · fa2c8d2
1 parent 159a2c5
commit fa2c8d2
Show file tree

Hide file tree

Showing 6 changed files with 48 additions and 73 deletions.
diff --git a/lib/pbench/cli/server/__init__.py b/lib/pbench/cli/server/__init__.py
@@ -1,7 +1,6 @@
 import datetime
 from threading import Thread
 import time
-from typing import Any, Optional
 from typing import Any, Optional, Union
 
 import click
@@ -163,27 +162,6 @@ def watcher(self):
             )
 
 
-class DateParser(ParamType):
-    """The DateParser type converts date strings into `datetime` objects.
-
-    This is a variant of click's built-in DateTime parser, but uses the
-    more flexible dateutil.parser
-    """
-
-    name = "dateparser"
-
-    def convert(
-        self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
-    ) -> Any:
-        if isinstance(value, datetime.datetime):
-            return value
-
-        try:
-            return parser.parse(value)
-        except Exception as e:
-            self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
-
-
 def config_setup(context: object) -> PbenchServerConfig:
     config = PbenchServerConfig.create(context.config)
     # We're going to need the DB to track dataset state, so setup DB access.

diff --git a/lib/pbench/cli/server/repair.py b/lib/pbench/cli/server/repair.py
@@ -61,7 +61,8 @@ def repair_audit(kwargs):
         didit = False
         for key, value in a.items():
             if type(value) is str and len(value) > LIMIT:
-                a[key] = f"[TRUNC({len(value)})]" + value[:LIMIT]
+                p = f"[TRUNC({len(value)})]"
+                a[key] = (p + value)[:LIMIT]
                 detailer.message(f"{name} [{key}] truncated ({len(value)}) to {LIMIT}")
                 didit = True
         if didit:
@@ -72,8 +73,10 @@ def repair_audit(kwargs):
             Database.db_session.commit()
         except Exception as e:
             commit_error = str(e)
-    click.echo(f"{count} audit records had attributes too long")
-    click.echo(f"{updated} records were fixed")
+    click.echo(f"{count} audit records triggered field truncation")
+    click.echo(
+        f"{updated} records were truncated, {count-updated} had no eligible fields"
+    )
     if attributes_errors:
         click.echo(f"{attributes_errors} had format errors in attributes")
     if commit_error:

diff --git a/lib/pbench/server/api/resources/intake_base.py b/lib/pbench/server/api/resources/intake_base.py
@@ -47,7 +47,11 @@
     OperationName,
     OperationState,
 )
-from pbench.server.database.models.server_settings import get_retention_days
+from pbench.server.database.models.server_settings import (
+    get_retention_days,
+    OPTION_SERVER_INDEXING,
+    ServerSetting,
+)
 from pbench.server.sync import Sync
 from pbench.server.utils import UtcTimeHelper
 

diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py
@@ -27,6 +27,7 @@
 RECLAIM_BYTES_PAD = 1024  # Pad unpack reclaim requests by this much
 MB_BYTES = 1024 * 1024  # Bytes in Mb
 MAX_ERROR = 1024 * 5  # Maximum length of subprocess stderr to capture
+TRUNC_PREFIX = "[TRUNC]"  # Indicate that subprocess stderr was truncated
 
 
 class CacheManagerError(Exception):
@@ -96,24 +97,35 @@ def __init__(self, tarball: Path, error: Exception):
         self.error = str(error)
 
 
-class TarballUnpackError(CacheManagerError):
+class UnpackBaseError(CacheManagerError):
+    """Base class for unpacking errors"""
+
+    def __init__(self, context: Path, error: str, stderr: Optional[str] = None):
+        m = f"{error}: {stderr!r}" if stderr else error
+        super().__init__(m)
+        self.stderr = stderr
+
+
+class TarballUnpackError(UnpackBaseError):
     """An error occurred trying to unpack a tarball."""
 
-    def __init__(self, tarball: Path, error: str):
-        super().__init__(f"An error occurred while unpacking {tarball}: {error}")
+    def __init__(self, tarball: Path, error: str, stderr: Optional[str] = None):
+        super().__init__(
+            tarball, f"An error occurred while unpacking {tarball}: {error}", stderr
+        )
         self.tarball = tarball
-        self.error = error
 
 
-class TarballModeChangeError(CacheManagerError):
+class TarballModeChangeError(UnpackBaseError):
     """An error occurred trying to fix unpacked tarball permissions."""
 
-    def __init__(self, tarball: Path, error: str):
+    def __init__(self, directory: Path, error: str, stderr: Optional[str] = None):
         super().__init__(
-            f"An error occurred while changing file permissions of {tarball}: {error}"
+            directory,
+            f"An error occurred while changing file permissions of {directory}: {error}",
+            stderr,
         )
-        self.tarball = tarball
-        self.error = error
+        self.directory = directory
 
 
 class CacheType(Enum):
@@ -994,7 +1006,7 @@ def _get_metadata(tarball_path: Path) -> JSONOBJECT:
     def subprocess_run(
         command: str,
         working_dir: PathLike,
-        exception: type[CacheManagerError],
+        exception: type[UnpackBaseError],
         ctx: Path,
     ):
         """Runs a command as a subprocess.
@@ -1025,39 +1037,13 @@ def subprocess_run(
             raise exception(ctx, str(exc)) from exc
         else:
             if process.returncode != 0:
-
-                # tar failures can include a message for each file in the
-                # archive, and can be quite large. Break stderr into lines, and
-                # gather those lines only up to our configured size limit.
-                #
-                # The exception we raise and ultimately log will start by
-                # identifying the tarball, followed by the command and the
-                # return code. The appended stderr may provide additional
-                # useful context, but in general beyond the first line it'll
-                # be fairly meaningless.
-                size = 0
-                message = []
-                lines = process.stderr.split("\n")
-                prefix = ""
-                for line in lines:
-                    # Skip empty lines
-                    if not line:
-                        continue
-                    size += len(line)
-                    if size > MAX_ERROR:
-                        prefix = "[TRUNC]"
-                        break
-                    message.append(line)
-
-                # Prefix with [TRUNC] if we're not including the entire message
-                # and truncate the first line if it was too long by itself.
-                if message:
-                    msg = prefix + ("\n".join(message))
-                else:
-                    msg = "[TRUNC]" + lines[0][:MAX_ERROR]
+                msg = process.stderr
+                if len(msg) > MAX_ERROR - len(TRUNC_PREFIX):
+                    msg = (TRUNC_PREFIX + msg)[:MAX_ERROR]
                 raise exception(
                     ctx,
-                    f"{cmd[0]} exited with status {process.returncode}: {msg!r}",
+                    f"{cmd[0]} exited with status {process.returncode}",
+                    stderr=msg,
                 )
 
     def get_unpacked_size(self) -> int:

diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py
@@ -405,9 +405,15 @@ def mock_run(args, **_kwargs):
     @pytest.mark.parametrize(
         "errmsg,expected",
         (
-            ("Error unpacking tarball\n", "Error unpacking tarball"),
-            ("This is a message we'll find too long", "[TRUNC]This is a message we'll f"),
-            ("One line\nTwo line\nThree line\nFour line\n", "[TRUNC]One line\nTwo line"),
+            ("Error unpacking", "Error unpacking"),
+            (
+                "This is a message we'll find too long",
+                "[TRUNC]This is a message ",
+            ),
+            (
+                "One line\nTwo line\nThree line\nFour line\n",
+                "[TRUNC]One line\nTwo line\n",
+            ),
         ),
     )
     def test_tarball_subprocess_failure(self, monkeypatch, errmsg, expected):

diff --git a/setup.cfg b/setup.cfg
@@ -29,14 +29,12 @@ console_scripts =
    pbench-clear-results = pbench.cli.agent.commands.results.clear:main
    pbench-clear-tools = pbench.cli.agent.commands.tools.clear:main
    pbench-config = pbench.cli.agent.commands.conf:main
-   pbench-repair = pbench.cli.server.repair:repair
-   pbench-report-generator = pbench.cli.server.report:report
-   pbench-tree-manage = pbench.cli.server.tree_manage:tree_manage
    pbench-is-local = pbench.cli.agent.commands.is_local:main
    pbench-list-tools = pbench.cli.agent.commands.tools.list:main
    pbench-list-triggers = pbench.cli.agent.commands.triggers.list:main
    pbench-register-tool-trigger = pbench.cli.agent.commands.triggers.register:main
    pbench-reindex = pbench.cli.server.reindex:reindex
+   pbench-repair = pbench.cli.server.repair:repair
    pbench-report-generator = pbench.cli.server.report:report
    pbench-results-move = pbench.cli.agent.commands.results.move:main
    pbench-results-push = pbench.cli.agent.commands.results.push:main