Merge pull request #15 from reworkd/monke

🚀 Field parameterization
reworkd · Nov 22, 2023 · ceb51cf · ceb51cf
2 parents e445b02 + 9d4dba8
commit ceb51cf
Show file tree

Hide file tree

Showing 13 changed files with 980 additions and 318 deletions.
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -1,14 +1,34 @@
-name: Testing Platform
+name: Test and Publish
 on:
   push:
-    branches: [ "main" ]
+    branches: ["main"]
   pull_request:
-    branches: [ "main" ]
+    branches: ["main"]
 
 env:
   PYTHON_VERSION: "3.11"
 
 jobs:
+  check-version:
+    runs-on: ubuntu-latest
+    outputs:
+      should_publish: ${{ steps.pre-val.outputs.should_publish }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Check if current version is published
+        id: pre-val
+        run: |
+          LOCAL_VERSION=$(grep '^version =' pyproject.toml | head -1 | awk -F '"' '{print $2}')
+          REMOTE_VERSION=$(curl -s https://pypi.org/pypi/bananalyzer/json | jq -r .info.version)
+
+          echo "Local version: $LOCAL_VERSION"
+          echo "Remote version: $REMOTE_VERSION"
+
+          if [ "$LOCAL_VERSION" != "$REMOTE_VERSION" ]; then
+              echo "Version $LOCAL_VERSION is not published yet"
+              echo "::set-output name=should_publish::true"
+          fi
+
   black:
     runs-on: ubuntu-latest
     steps:
@@ -18,7 +38,7 @@ jobs:
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'poetry'
+          cache: "poetry"
       - run: poetry install
       - name: Run isort check
         run: poetry run isort --check .
@@ -34,7 +54,7 @@ jobs:
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'poetry'
+          cache: "poetry"
       - run: poetry install
       - name: Run mypy check
         run: poetry run mypy .
@@ -48,7 +68,25 @@ jobs:
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'poetry'
-      - run: poetry install
+          cache: "poetry"
+      - run: poetry install && poetry run playwright install chromium
       - name: Run pytest check
-        run: poetry run pytest -vv --cov="bananalyzer" .
+        run: poetry run pytest -vv .
+
+  publish:
+    needs: [check-version, black, mypy, pytest]
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/main' && needs.check-version.outputs.should_publish == 'true'
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install poetry
+        run: pipx install poetry
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: "poetry"
+      - run: poetry install
+      - name: Build and Publish
+        run: |
+          poetry config pypi-token.pypi ${{ secrets.PYPI_TOKEN }}
+          poetry publish --build
diff --git a/bananalyzer/__main__.py b/bananalyzer/__main__.py
@@ -10,7 +10,8 @@
 from typing import List
 
 from bananalyzer import AgentRunner, examples
-from bananalyzer.runner.runner import TestGenerator, run_tests
+from bananalyzer.runner.generator import PytestTestGenerator
+from bananalyzer.runner.runner import run_tests
 from bananalyzer.schema import AgentRunnerClass, Args, PytestArgs
 
 
@@ -40,7 +41,8 @@ def print_intro() -> None:
 def parse_args() -> Args:
     file_name = "bananalyzer-agent.py"
     parser = argparse.ArgumentParser(
-        description=f"Run the agent inside a bananalyzer agent definition file against the benchmark",
+        description=f"Run the agent inside a bananalyzer agent definition file "
+        f"against the benchmark",
     )
     parser.add_argument("path", type=str, help=f"Path to the {file_name} file")
     parser.add_argument(
@@ -87,6 +89,19 @@ def parse_args() -> Args:
         default=[],
         help="A list of ids to skip tests on, separated by commas",
     )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Will decrease the verbosity of pytest. By default we run with the `--v` pytest param.",
+    )
+    parser.add_argument(
+        "--single_browser_instance",
+        action="store_true",
+        help="Run tests in a single browser instance as opposed to creating a browser "
+        "instance per test. This is faster but less reliable as test contexts can "
+        "occasionally bleed into each other, causing tests to fail",
+    )
 
     args = parser.parse_args()
 
@@ -101,14 +116,16 @@ def parse_args() -> Args:
         id=args.id,
         domain=args.domain,
         skip=args.skip,
+        single_browser_instance=args.single_browser_instance,
         pytest_args=PytestArgs(
             s=args.s,
             n=args.n,
+            q=args.quiet,
         ),
     )
 
 
-def find_decorated_scrapers(file_path: Path) -> List[AgentRunnerClass]:
+def find_agents(file_path: Path) -> List[AgentRunnerClass]:
     with open(file_path, "r") as source:
         node = ast.parse(source.read())
 
@@ -127,13 +144,13 @@ def find_decorated_scrapers(file_path: Path) -> List[AgentRunnerClass]:
 
 def load_agent_from_path(path: Path) -> AgentRunnerClass:
     if path.is_dir():
-        files = list(path.glob("**/*.py"))
+        files = [p for p in path.glob("**/*.py") if "venv" not in p.parts]
     else:
         files = [path]
 
     runners: List[AgentRunnerClass] = []
     for file in files:
-        runners.extend(find_decorated_scrapers(file))
+        runners.extend(find_agents(file))
 
     if len(runners) == 0:
         raise RuntimeError(f"Could not find any agent runners in {path}")
@@ -205,13 +222,13 @@ def main() -> int:
         return 0
 
     # Load the desired tests
-    generator = TestGenerator()
-    tests = [
-        generator.generate_test(example, args.headless) for example in filtered_examples
-    ]
+    generator = PytestTestGenerator()
+    tests = [generator.generate_test(example) for example in filtered_examples]
 
     # Run the tests
-    return run_tests(tests, agent, args.pytest_args)
+    return run_tests(
+        tests, agent, args.pytest_args, args.headless, args.single_browser_instance
+    )
 
 
 if __name__ == "__main__":

diff --git a/bananalyzer/runner/generator.py b/bananalyzer/runner/generator.py
@@ -0,0 +1,58 @@
+from typing import Dict
+from urllib.parse import urlparse
+
+from bananalyzer import Example
+from bananalyzer.data.schemas import Eval
+from bananalyzer.runner.runner import BananalyzerTest
+
+
+class PytestTestGenerator:
+    def __init__(self) -> None:
+        self._classnames: Dict[str, int] = {}
+
+    def generate_test(self, example: Example) -> BananalyzerTest:
+        return BananalyzerTest(
+            code=f"""
+@pytest.mark.asyncio
+class {self._generate_class_name(example)}:
+
+    @classmethod
+    def setup_class(cls):
+        cls.example = get_example_by_url("{example.url}")
+
+
+    @pytest_asyncio.fixture(scope="class")
+    async def result(self, context, agent):
+        yield await agent.run(context, self.example)
+
+    {"".join(self._generate_eval_test(eval_, i) for i, eval_ in enumerate(example.evals))}
+""",
+            example=example,
+        )
+
+    def _generate_eval_test(self, eval_: Eval, i: int) -> str:
+        if eval_.type == "json_match" and isinstance(eval_.expected, dict):
+            return f"""
+    @pytest.mark.parametrize("key", {list(eval_.expected.keys())})
+    async def test_match_field(self, key, result) -> None:
+        assert self.example.evals[{i}].expected.get(key, None) == result.get(key, None)
+
+"""
+        return f"""
+    async def test_{eval_.type}(self, result) -> None:
+        self.example.evals[{i}].eval_results(None, result)
+
+"""
+
+    def _generate_class_name(self, example: Example) -> str:
+        domain = urlparse(example.url).netloc
+        domain = domain.replace(".", "_")
+        if domain.startswith("www_"):
+            domain = domain[4:]
+
+        domain = "".join([part.capitalize() for part in domain.split("_")])
+
+        key = f"{example.type.capitalize()}{domain}"
+        self._classnames[key] = self._classnames.get(key, -1) + 1
+        suffix = "" if not self._classnames[key] else f"{self._classnames[key] + 1}"
+        return f"Test{key}{suffix}"
diff --git a/bananalyzer/runner/null_agent_wrapper.py b/bananalyzer/runner/null_agent_wrapper.py
@@ -20,5 +20,6 @@ async def run(
         print(f"Testing {example.get_static_url()}")
         await page.goto(example.get_static_url())
         await asyncio.sleep(0.5)
+
         print(f"Done testing {example.get_static_url()}")
         return example.evals[0].expected  # type: ignore