diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..1431c46 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,3 @@ +# Ci related folders +/.github/ @weaviate/core +/ci/ @weaviate/core diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..824c4cc --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,27 @@ +name: Main + +on: + push: + branches: + - main + tags: + - '**' + paths-ignore: + - README.md + - LICENSE + pull_request: + +jobs: + Push-Docker: + name: push-docker + runs-on: ubuntu-latest-8-cores + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + steps: + - uses: actions/checkout@v4 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{secrets.DOCKER_USERNAME}} + password: ${{secrets.DOCKER_PASSWORD}} + - name: Push container + run: ./cicd/push_docker.sh diff --git a/LICENSE b/LICENSE index 5d38bda..ded646e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2020-2022, SeMI Technologies B.V. +Copyright (c) 2020-2024, Weaviate B.V. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -24,4 +24,4 @@ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/benchmarker/Dockerfile b/benchmarker/Dockerfile index 49d0f85..c58bb28 100644 --- a/benchmarker/Dockerfile +++ b/benchmarker/Dockerfile @@ -1,5 +1,5 @@ FROM golang:alpine -RUN apk add --no-cache hdf5-dev gcc libc-dev python3 +RUN apk add --no-cache hdf5-dev gcc libc-dev python3 bash WORKDIR /app COPY . . RUN CGO_ENABLED=1 go build -o benchmarker . diff --git a/benchmarker/scripts/ann.py b/benchmarker/scripts/python/ann.py similarity index 100% rename from benchmarker/scripts/ann.py rename to benchmarker/scripts/python/ann.py diff --git a/benchmarker/scripts/collate-results.py b/benchmarker/scripts/python/collate-results.py similarity index 100% rename from benchmarker/scripts/collate-results.py rename to benchmarker/scripts/python/collate-results.py diff --git a/benchmarker/scripts/generate-filtered-dataset.py b/benchmarker/scripts/python/generate-filtered-dataset.py similarity index 100% rename from benchmarker/scripts/generate-filtered-dataset.py rename to benchmarker/scripts/python/generate-filtered-dataset.py diff --git a/benchmarker/scripts/generate-keyword-correlated-filter-dataset.py b/benchmarker/scripts/python/generate-keyword-correlated-filter-dataset.py similarity index 100% rename from benchmarker/scripts/generate-keyword-correlated-filter-dataset.py rename to benchmarker/scripts/python/generate-keyword-correlated-filter-dataset.py diff --git a/benchmarker/scripts/python/update_stability.py b/benchmarker/scripts/python/update_stability.py new file mode 100644 index 0000000..7e78f8e --- /dev/null +++ b/benchmarker/scripts/python/update_stability.py @@ -0,0 +1,39 @@ +import os +import glob +import json +import unittest + + +PATH = "./results" +REQUIRED_RECALL = .992 + + +class TestResults(unittest.TestCase): + + def setUp(self): + self.datapoints = [] + + for result_filename in glob.glob(os.path.join(PATH, "*.json")): + with open(os.path.join(os.getcwd(), result_filename), "r") as result_file: + self.datapoints.append(json.load(result_file)) + + def test_max_recall(self): + + rr_env = os.getenv("REQUIRED_RECALL") + + if rr_env: + required_recall = float(rr_env) + else: + required_recall = REQUIRED_RECALL + + for run_iteration in self.datapoints: + + max_recall = max([run_config["recall"] for run_config in run_iteration]) + self.assertTrue( + max_recall >= required_recall, + f"need to achieve at least {required_recall} recall, got only {max_recall}", + ) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/benchmarker/scripts/shell/update_stability.sh b/benchmarker/scripts/shell/update_stability.sh new file mode 100755 index 0000000..9708372 --- /dev/null +++ b/benchmarker/scripts/shell/update_stability.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -eou pipefail + +echo "Run benchmark script" +/app/benchmarker ann-benchmark \ + -v /app/datasets/${DATASET}.hdf5 \ + --distance $DISTANCE \ + --indexType $INDEX_TYPE \ + --updatePercentage $UPDATE_PERCENTAGE \ + --cleanupIntervalSeconds $CLEANUP_INTERVAL_SECONDS \ + --updateIterations $UPDATE_ITERATIONS \ + --grpcOrigin "${WEAVIATE_URL}:50051" \ + --httpOrigin "${WEAVIATE_URL}:8080" \ + --updateRandomized + + +echo "Run complete, now analyze the results" +python3 /app/scripts/python/update_stability.py + +echo "Passed!" \ No newline at end of file diff --git a/ci/push_docker.sh b/ci/push_docker.sh new file mode 100755 index 0000000..45ba48e --- /dev/null +++ b/ci/push_docker.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -eou pipefail + +DOCKER_REPO="semitechnologies/weaviate-benchmarker" + +function main() { + init + echo "git ref type is \"$GITHUB_REF_TYPE\"" + echo "git ref name is \"$GITHUB_REF_NAME\"" + build_and_push_tag +} + +function init() { + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker buildx create --use +} + +function build_and_push_tag() { + if [ ! -z "$GITHUB_REF_NAME" ] && [ "$GITHUB_REF_TYPE" == "tag" ]; then + tag_git="$DOCKER_REPO:$GITHUB_REF_NAME" + tag_latest="$DOCKER_REPO:latest" + + echo "Tag & Push $tag_latest, $tag_git" + docker buildx build --platform=linux/arm64,linux/amd64 \ + --push \ + --tag "$tag_git" \ + --tag "$tag_latest" \ + ./benchmarker + fi +} + +main