From f826a1413d27ce1761bed1281d85bbde8086e8f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20P=C3=A9rez?= Date: Mon, 13 May 2024 13:59:20 -0400 Subject: [PATCH] Delete all Mesos-using code (+ misc. cleanups) (#215) 1. Clean-up makefile (removed all dead targets, made all targets PHONY) 2. Remove all unused plugins (deleted code is code we don't need to maintain) 3. Regenerate docs (target was not phony, so it would never regenerate docs unless run with --always-make) 4. Clean-up README.md (removed Mesos references as well as references to deleted plugins and whatnot) 5. Cleaned up dead tests (this included deleting all the itests since they were all Mesos-related) --- Makefile | 9 +- README.md | 64 +- .../task_processing.interfaces.event.rst | 10 +- ...task_processing.interfaces.persistence.rst | 7 + .../generated/task_processing.interfaces.rst | 12 +- .../task_processing.interfaces.runner.rst | 10 +- ...sk_processing.interfaces.task_executor.rst | 10 +- .../generated/task_processing.metrics.rst | 7 + ...cessing.plugins.kubernetes.kube_client.rst | 7 + ...ins.kubernetes.kubernetes_pod_executor.rst | 7 + .../task_processing.plugins.kubernetes.rst | 23 + ...cessing.plugins.kubernetes.task_config.rst | 7 + ...ssing.plugins.kubernetes.task_metadata.rst | 7 + ...sk_processing.plugins.kubernetes.types.rst | 7 + ...sk_processing.plugins.kubernetes.utils.rst | 7 + ...sing.plugins.mesos.execution_framework.rst | 7 - ...rocessing.plugins.mesos.mesos_executor.rst | 7 - .../task_processing.plugins.mesos.rst | 19 - ...sk_processing.plugins.mesos.translator.rst | 7 - .../generated/task_processing.plugins.rst | 13 +- docs/source/generated/task_processing.rst | 23 +- .../task_processing.runners.async.rst | 7 - .../task_processing.runners.async_runner.rst | 7 + .../task_processing.runners.promise.rst | 10 +- .../generated/task_processing.runners.rst | 13 +- .../task_processing.runners.subscription.rst | 10 +- .../task_processing.runners.sync.rst | 10 +- .../task_processing.task_processor.rst | 7 + .../generated/task_processing.utils.rst | 7 + examples/__init__.py | 0 examples/async.py | 61 -- examples/cluster/docker-compose.yaml | 51 -- examples/cluster/mesos-agent-secret | 4 - examples/cluster/mesos-secrets | 12 - examples/cluster/playground/Dockerfile | 35 - examples/cluster/secret | 1 - examples/common.py | 38 - examples/dynamo_persistence.py | 82 --- examples/file_persistence.py | 49 -- examples/hello-world.py | 59 -- examples/offer_timeout.py | 71 -- examples/promise.py | 48 -- examples/retry.py | 47 -- examples/subscription.py | 61 -- examples/sync.py | 43 -- examples/task_logging.py | 48 -- examples/timeout.py | 49 -- itest | 15 - setup.py | 2 - task_processing/plugins/mesos/__init__.py | 19 - task_processing/plugins/mesos/constraints.py | 65 -- .../plugins/mesos/execution_framework.py | 693 ------------------ .../plugins/mesos/logging_executor.py | 269 ------- .../plugins/mesos/mesos_executor.py | 108 --- .../plugins/mesos/mesos_pod_executor.py | 6 - .../plugins/mesos/mesos_task_executor.py | 51 -- task_processing/plugins/mesos/metrics.py | 22 - .../plugins/mesos/resource_helpers.py | 97 --- .../plugins/mesos/retrying_executor.py | 167 ----- task_processing/plugins/mesos/task_config.py | 135 ---- .../plugins/mesos/timeout_executor.py | 132 ---- task_processing/plugins/mesos/translator.py | 146 ---- .../plugins/persistence/__init__.py | 0 .../persistence/dynamodb_persistence.py | 72 -- .../plugins/persistence/file_persistence.py | 27 - task_processing/plugins/stateful/__init__.py | 8 - .../plugins/stateful/stateful_executor.py | 49 -- tests/integration/cluster | 1 - tests/integration/mesos/features/sync.feature | 9 - tests/integration/mesos/mesos_test.py | 29 - tests/unit/plugins/mesos/conftest.py | 86 --- tests/unit/plugins/mesos/constraints_test.py | 284 ------- .../plugins/mesos/execution_framework_test.py | 568 -------------- .../plugins/mesos/logging_executor_test.py | 185 ----- .../unit/plugins/mesos/mesos_executor_test.py | 90 --- .../plugins/mesos/mesos_task_config_test.py | 35 - .../plugins/mesos/mesos_task_executor_test.py | 48 -- .../plugins/mesos/resource_helpers_test.py | 66 -- .../plugins/mesos/retrying_executor_test.py | 287 -------- .../plugins/mesos/timeout_executor_test.py | 209 ------ tests/unit/plugins/mesos/translator_test.py | 151 ---- .../persistence/dynamo_persistence_test.py | 99 --- .../stateful/stateful_executor_test.py | 44 -- tox.ini | 22 +- 84 files changed, 179 insertions(+), 5217 deletions(-) create mode 100644 docs/source/generated/task_processing.interfaces.persistence.rst create mode 100644 docs/source/generated/task_processing.metrics.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.task_config.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.types.rst create mode 100644 docs/source/generated/task_processing.plugins.kubernetes.utils.rst delete mode 100644 docs/source/generated/task_processing.plugins.mesos.execution_framework.rst delete mode 100644 docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst delete mode 100644 docs/source/generated/task_processing.plugins.mesos.rst delete mode 100644 docs/source/generated/task_processing.plugins.mesos.translator.rst delete mode 100644 docs/source/generated/task_processing.runners.async.rst create mode 100644 docs/source/generated/task_processing.runners.async_runner.rst create mode 100644 docs/source/generated/task_processing.task_processor.rst create mode 100644 docs/source/generated/task_processing.utils.rst delete mode 100755 examples/__init__.py delete mode 100755 examples/async.py delete mode 100644 examples/cluster/docker-compose.yaml delete mode 100644 examples/cluster/mesos-agent-secret delete mode 100644 examples/cluster/mesos-secrets delete mode 100644 examples/cluster/playground/Dockerfile delete mode 100644 examples/cluster/secret delete mode 100644 examples/common.py delete mode 100755 examples/dynamo_persistence.py delete mode 100755 examples/file_persistence.py delete mode 100755 examples/hello-world.py delete mode 100755 examples/offer_timeout.py delete mode 100755 examples/promise.py delete mode 100755 examples/retry.py delete mode 100755 examples/subscription.py delete mode 100755 examples/sync.py delete mode 100755 examples/task_logging.py delete mode 100755 examples/timeout.py delete mode 100755 itest delete mode 100644 task_processing/plugins/mesos/__init__.py delete mode 100644 task_processing/plugins/mesos/constraints.py delete mode 100644 task_processing/plugins/mesos/execution_framework.py delete mode 100644 task_processing/plugins/mesos/logging_executor.py delete mode 100644 task_processing/plugins/mesos/mesos_executor.py delete mode 100644 task_processing/plugins/mesos/mesos_pod_executor.py delete mode 100644 task_processing/plugins/mesos/mesos_task_executor.py delete mode 100644 task_processing/plugins/mesos/metrics.py delete mode 100644 task_processing/plugins/mesos/resource_helpers.py delete mode 100644 task_processing/plugins/mesos/retrying_executor.py delete mode 100644 task_processing/plugins/mesos/task_config.py delete mode 100644 task_processing/plugins/mesos/timeout_executor.py delete mode 100644 task_processing/plugins/mesos/translator.py delete mode 100644 task_processing/plugins/persistence/__init__.py delete mode 100644 task_processing/plugins/persistence/dynamodb_persistence.py delete mode 100644 task_processing/plugins/persistence/file_persistence.py delete mode 100644 task_processing/plugins/stateful/__init__.py delete mode 100644 task_processing/plugins/stateful/stateful_executor.py delete mode 120000 tests/integration/cluster delete mode 100644 tests/integration/mesos/features/sync.feature delete mode 100644 tests/integration/mesos/mesos_test.py delete mode 100644 tests/unit/plugins/mesos/conftest.py delete mode 100644 tests/unit/plugins/mesos/constraints_test.py delete mode 100644 tests/unit/plugins/mesos/execution_framework_test.py delete mode 100644 tests/unit/plugins/mesos/logging_executor_test.py delete mode 100644 tests/unit/plugins/mesos/mesos_executor_test.py delete mode 100644 tests/unit/plugins/mesos/mesos_task_config_test.py delete mode 100644 tests/unit/plugins/mesos/mesos_task_executor_test.py delete mode 100644 tests/unit/plugins/mesos/resource_helpers_test.py delete mode 100644 tests/unit/plugins/mesos/retrying_executor_test.py delete mode 100644 tests/unit/plugins/mesos/timeout_executor_test.py delete mode 100644 tests/unit/plugins/mesos/translator_test.py delete mode 100644 tests/unit/plugins/persistence/dynamo_persistence_test.py delete mode 100644 tests/unit/plugins/stateful/stateful_executor_test.py diff --git a/Makefile b/Makefile index 232244a9..95f7bab4 100644 --- a/Makefile +++ b/Makefile @@ -5,24 +5,27 @@ else BUILD_ENV?=$(shell hostname -f) endif +.PHONY: venv venv: tox -e venv +.PHONY: test test: tox +.PHONY: tox_% tox_%: tox -e $* -itest: - tox -e integration - +.PHONY: docs docs: tox -e docs +.PHONY: pypi pypi: tox -e pypi +.PHONY: clean clean: rm -rf docs/build find . -name '*.pyc' -delete diff --git a/README.md b/README.md index 6232a00c..d950bae4 100644 --- a/README.md +++ b/README.md @@ -6,47 +6,9 @@ Interfaces and shared infrastructure for generic task processing (also known as ### Pre-requisites -+ [Docker](https://www.docker.com/get-docker) + [Python 3.8](https://www.python.org/downloads/) + [Virtualenv](https://virtualenv.pypa.io/en/stable/installation/) -### Running examples - -[hello-world.py](/examples/hello-world/py) is a very simple annotated example that launches a task to echo `hello world`. From the root of the repository, run: - - docker-compose -f examples/cluster/docker-compose.yaml \ - run playground examples/hello-world.py - -This will bring up a single master, single agent Mesos cluster using [Docker Compose](https://docs.docker.com/compose/) and launch a single task which will print "hello world" to the sandbox's stdout before terminating. - -Other examples available include: -+ async.py -Example of the [async](#async) task runner. - -+ dynamo_persistence.py -Example that shows how task events may be persisted to [DynamoDB](https://aws.amazon.com/dynamodb) using the `stateful` plugin. - -+ file_persistence.py -Example that shows how task events may be persisted to disk using the `stateful` plugin. - -+ promise.py -Example that shows how the [promise/future](#Promise/Future) task runner (not yet implemented) may be used. - -+ subscription.py -Example of the [subscription](#subscription) task runner. - -+ sync.py -Brief example using the [sync](#sync) task runner. - -+ timeout.py -Example that shows how to timeout a task execution using the `timeout` plugin. - -+ retry.py -Example that shows how to retry a task on failure using the `retry` plugin. - -+ task_logging.py -Example that shows how to fetch task logs from Mesos agents using the `logging` plugin. - ### Running tests From the root of the repository, run: @@ -65,30 +27,10 @@ From the root of the repository, run: ### /plugins -Plugins can be chained to create a task execution pipeline with more than one property. Please refer to persistence/retry/timeout examples. - -#### mesos -Implements all required interfaces to talk to Mesos deployment. This plugin uses [PyMesos](https://github.com/douban/pymesos) to communicate with Mesos. - -#### timeout -Implements an executor to timeout task execution. - -#### retrying -Implements an executor to retry task execution upon failure. - -#### logging -Implements an executor to retrieve task logs from Mesos agents. Note that it has to be the immediate upstream executor of the mesos executor. - -##### Configuration options - -- authentication\_principal Mesos principal -- credential\_secret\_file path to file containing Mesos secret -- mesos\_address host:port to connect to Mesos cluster -- event_translator a fucntion that maps Mesos-specific events to `Event` objects - -#### stateful +Plugins can be chained to create a task execution pipeline with more than one property. -TODO: documentation +#### Kubernetes +Implements all required interfaces to talk to Kubernetes. This plugin uses [kubernetes-client](https://github.com/kubernetes-client/python) to communicate with Kubernetes. ### /runners diff --git a/docs/source/generated/task_processing.interfaces.event.rst b/docs/source/generated/task_processing.interfaces.event.rst index 7ae80859..b8a2cca0 100644 --- a/docs/source/generated/task_processing.interfaces.event.rst +++ b/docs/source/generated/task_processing.interfaces.event.rst @@ -1,7 +1,7 @@ -task\_processing\.interfaces\.event module -========================================== +task\_processing.interfaces.event module +======================================== .. automodule:: task_processing.interfaces.event - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.persistence.rst b/docs/source/generated/task_processing.interfaces.persistence.rst new file mode 100644 index 00000000..3c457a0d --- /dev/null +++ b/docs/source/generated/task_processing.interfaces.persistence.rst @@ -0,0 +1,7 @@ +task\_processing.interfaces.persistence module +============================================== + +.. automodule:: task_processing.interfaces.persistence + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.rst b/docs/source/generated/task_processing.interfaces.rst index 72c42e29..1a06f845 100644 --- a/docs/source/generated/task_processing.interfaces.rst +++ b/docs/source/generated/task_processing.interfaces.rst @@ -1,12 +1,14 @@ -task\_processing\.interfaces package -==================================== +task\_processing.interfaces package +=================================== Submodules ---------- .. toctree:: + :maxdepth: 4 task_processing.interfaces.event + task_processing.interfaces.persistence task_processing.interfaces.runner task_processing.interfaces.task_executor @@ -14,6 +16,6 @@ Module contents --------------- .. automodule:: task_processing.interfaces - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.runner.rst b/docs/source/generated/task_processing.interfaces.runner.rst index 2cf1789b..7efc8ade 100644 --- a/docs/source/generated/task_processing.interfaces.runner.rst +++ b/docs/source/generated/task_processing.interfaces.runner.rst @@ -1,7 +1,7 @@ -task\_processing\.interfaces\.runner module -=========================================== +task\_processing.interfaces.runner module +========================================= .. automodule:: task_processing.interfaces.runner - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.task_executor.rst b/docs/source/generated/task_processing.interfaces.task_executor.rst index b2a89cab..7fd3b3d6 100644 --- a/docs/source/generated/task_processing.interfaces.task_executor.rst +++ b/docs/source/generated/task_processing.interfaces.task_executor.rst @@ -1,7 +1,7 @@ -task\_processing\.interfaces\.task\_executor module -=================================================== +task\_processing.interfaces.task\_executor module +================================================= .. automodule:: task_processing.interfaces.task_executor - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.metrics.rst b/docs/source/generated/task_processing.metrics.rst new file mode 100644 index 00000000..002b6d8e --- /dev/null +++ b/docs/source/generated/task_processing.metrics.rst @@ -0,0 +1,7 @@ +task\_processing.metrics module +=============================== + +.. automodule:: task_processing.metrics + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst b/docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst new file mode 100644 index 00000000..c130a5b1 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.kube\_client module +======================================================= + +.. automodule:: task_processing.plugins.kubernetes.kube_client + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst b/docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst new file mode 100644 index 00000000..06fafecf --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.kubernetes\_pod\_executor module +==================================================================== + +.. automodule:: task_processing.plugins.kubernetes.kubernetes_pod_executor + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.rst b/docs/source/generated/task_processing.plugins.kubernetes.rst new file mode 100644 index 00000000..2f0fa689 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.rst @@ -0,0 +1,23 @@ +task\_processing.plugins.kubernetes package +=========================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + task_processing.plugins.kubernetes.kube_client + task_processing.plugins.kubernetes.kubernetes_pod_executor + task_processing.plugins.kubernetes.task_config + task_processing.plugins.kubernetes.task_metadata + task_processing.plugins.kubernetes.types + task_processing.plugins.kubernetes.utils + +Module contents +--------------- + +.. automodule:: task_processing.plugins.kubernetes + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.task_config.rst b/docs/source/generated/task_processing.plugins.kubernetes.task_config.rst new file mode 100644 index 00000000..29546886 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.task_config.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.task\_config module +======================================================= + +.. automodule:: task_processing.plugins.kubernetes.task_config + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst b/docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst new file mode 100644 index 00000000..e8d1845e --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.task\_metadata module +========================================================= + +.. automodule:: task_processing.plugins.kubernetes.task_metadata + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.types.rst b/docs/source/generated/task_processing.plugins.kubernetes.types.rst new file mode 100644 index 00000000..5c5e0424 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.types.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.types module +================================================ + +.. automodule:: task_processing.plugins.kubernetes.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.utils.rst b/docs/source/generated/task_processing.plugins.kubernetes.utils.rst new file mode 100644 index 00000000..5e41a30e --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.utils.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.utils module +================================================ + +.. automodule:: task_processing.plugins.kubernetes.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.execution_framework.rst b/docs/source/generated/task_processing.plugins.mesos.execution_framework.rst deleted file mode 100644 index cea63183..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.execution_framework.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.plugins\.mesos\.execution\_framework module -============================================================= - -.. automodule:: task_processing.plugins.mesos.execution_framework - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst b/docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst deleted file mode 100644 index ba3a3f64..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.plugins\.mesos\.mesos\_executor module -======================================================== - -.. automodule:: task_processing.plugins.mesos.mesos_executor - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.rst b/docs/source/generated/task_processing.plugins.mesos.rst deleted file mode 100644 index 8b8370aa..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.rst +++ /dev/null @@ -1,19 +0,0 @@ -task\_processing\.plugins\.mesos package -======================================== - -Submodules ----------- - -.. toctree:: - - task_processing.plugins.mesos.execution_framework - task_processing.plugins.mesos.mesos_executor - task_processing.plugins.mesos.translator - -Module contents ---------------- - -.. automodule:: task_processing.plugins.mesos - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.translator.rst b/docs/source/generated/task_processing.plugins.mesos.translator.rst deleted file mode 100644 index 2cb13c96..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.translator.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.plugins\.mesos\.translator module -=================================================== - -.. automodule:: task_processing.plugins.mesos.translator - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.rst b/docs/source/generated/task_processing.plugins.rst index 425a912d..06f389b0 100644 --- a/docs/source/generated/task_processing.plugins.rst +++ b/docs/source/generated/task_processing.plugins.rst @@ -1,17 +1,18 @@ -task\_processing\.plugins package -================================= +task\_processing.plugins package +================================ Subpackages ----------- .. toctree:: + :maxdepth: 4 - task_processing.plugins.mesos + task_processing.plugins.kubernetes Module contents --------------- .. automodule:: task_processing.plugins - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.rst b/docs/source/generated/task_processing.rst index a250279d..763333c7 100644 --- a/docs/source/generated/task_processing.rst +++ b/docs/source/generated/task_processing.rst @@ -5,15 +5,26 @@ Subpackages ----------- .. toctree:: + :maxdepth: 4 - task_processing.interfaces - task_processing.plugins - task_processing.runners + task_processing.interfaces + task_processing.plugins + task_processing.runners + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + task_processing.metrics + task_processing.task_processor + task_processing.utils Module contents --------------- .. automodule:: task_processing - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.async.rst b/docs/source/generated/task_processing.runners.async.rst deleted file mode 100644 index 71993f5d..00000000 --- a/docs/source/generated/task_processing.runners.async.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.runners\.async module -======================================= - -.. automodule:: task_processing.runners.async - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.async_runner.rst b/docs/source/generated/task_processing.runners.async_runner.rst new file mode 100644 index 00000000..480156a3 --- /dev/null +++ b/docs/source/generated/task_processing.runners.async_runner.rst @@ -0,0 +1,7 @@ +task\_processing.runners.async\_runner module +============================================= + +.. automodule:: task_processing.runners.async_runner + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.promise.rst b/docs/source/generated/task_processing.runners.promise.rst index 6f5be7a4..a8f8ff59 100644 --- a/docs/source/generated/task_processing.runners.promise.rst +++ b/docs/source/generated/task_processing.runners.promise.rst @@ -1,7 +1,7 @@ -task\_processing\.runners\.promise module -========================================= +task\_processing.runners.promise module +======================================= .. automodule:: task_processing.runners.promise - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.rst b/docs/source/generated/task_processing.runners.rst index a840d97b..7e18893d 100644 --- a/docs/source/generated/task_processing.runners.rst +++ b/docs/source/generated/task_processing.runners.rst @@ -1,12 +1,13 @@ -task\_processing\.runners package -================================= +task\_processing.runners package +================================ Submodules ---------- .. toctree:: + :maxdepth: 4 - task_processing.runners.async + task_processing.runners.async_runner task_processing.runners.promise task_processing.runners.subscription task_processing.runners.sync @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: task_processing.runners - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.subscription.rst b/docs/source/generated/task_processing.runners.subscription.rst index 81c088cf..bd0e7f61 100644 --- a/docs/source/generated/task_processing.runners.subscription.rst +++ b/docs/source/generated/task_processing.runners.subscription.rst @@ -1,7 +1,7 @@ -task\_processing\.runners\.subscription module -============================================== +task\_processing.runners.subscription module +============================================ .. automodule:: task_processing.runners.subscription - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.sync.rst b/docs/source/generated/task_processing.runners.sync.rst index 450da322..99116f7c 100644 --- a/docs/source/generated/task_processing.runners.sync.rst +++ b/docs/source/generated/task_processing.runners.sync.rst @@ -1,7 +1,7 @@ -task\_processing\.runners\.sync module -====================================== +task\_processing.runners.sync module +==================================== .. automodule:: task_processing.runners.sync - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.task_processor.rst b/docs/source/generated/task_processing.task_processor.rst new file mode 100644 index 00000000..aabda2f7 --- /dev/null +++ b/docs/source/generated/task_processing.task_processor.rst @@ -0,0 +1,7 @@ +task\_processing.task\_processor module +======================================= + +.. automodule:: task_processing.task_processor + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.utils.rst b/docs/source/generated/task_processing.utils.rst new file mode 100644 index 00000000..2253f56b --- /dev/null +++ b/docs/source/generated/task_processing.utils.rst @@ -0,0 +1,7 @@ +task\_processing.utils module +============================= + +.. automodule:: task_processing.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/__init__.py b/examples/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/examples/async.py b/examples/async.py deleted file mode 100755 index 39b21a1e..00000000 --- a/examples/async.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 -import logging -import time - -from common import parse_args - -from task_processing.runners.async_runner import Async -from task_processing.runners.async_runner import EventHandler -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -class Counter(object): - def __init__(self): - self.terminated = 0 - - def process_event(self, event): - self.terminated += 1 - - -def main(): - args = parse_args() - - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "role": args.role, - }, - ) - - counter = Counter() - runner = Async( - executor, - [EventHandler(predicate=lambda x: x.terminal, cb=counter.process_event)], - ) - - TaskConfig = executor.TASK_CONFIG_INTERFACE - tasks_to_launch = 2 - for _ in range(tasks_to_launch): - task_config = TaskConfig(image="busybox", cmd="/bin/true") - runner.run(task_config) - - for _ in range(5): - print("terminated {} tasks".format(counter.terminated)) - if counter.terminated >= tasks_to_launch: - break - time.sleep(2) - - runner.stop() - return 0 if counter.terminated >= tasks_to_launch else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/cluster/docker-compose.yaml b/examples/cluster/docker-compose.yaml deleted file mode 100644 index dccecfaa..00000000 --- a/examples/cluster/docker-compose.yaml +++ /dev/null @@ -1,51 +0,0 @@ -version: "2" - -services: - zookeeper: - image: zookeeper - environment: - ZK_CONFIG: tickTime=2000,initLimit=10,syncLimit=5,maxClientCnxns=128,forceSync=no,clientPort=2181 - ZK_ID: 1 - mesosmaster: - image: mesosphere/mesos:1.3.0 - ports: - - 5050 - - 5054 - command: 'mesos-master --zk=zk://zookeeper:2181/mesos-testcluster --registry=in_memory --quorum=1 --authenticate --authenticate_agents --work_dir=/tmp/mesos --credentials=/etc/mesos-secrets' - depends_on: - - zookeeper - volumes: - - ./mesos-secrets:/etc/mesos-secrets - mesosagent: - image: mesosphere/mesos:1.3.0 - expose: - - 5051 - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - ./mesos-agent-secret:/etc/mesos-agent-secret - environment: - CLUSTER: testcluster - command: 'mesos-agent --master=zk://zookeeper:2181/mesos-testcluster --resources="cpus:20;mem:2048;disk:2000;ports:[31000-31100];cpus(taskproc):10;mem(taskproc):1024;disk(taskproc):1000;ports(taskproc):[31200-31500]" --credential=/etc/mesos-agent-secret --containerizers=docker --docker=/usr/bin/docker --work_dir=/tmp/mesos --attributes="region:fakeregion;pool:default" --no-docker_kill_orphans --log_dir=/var/log/mesos' - depends_on: - - mesosmaster - - zookeeper - playground: - build: - context: ../.. - dockerfile: ./examples/cluster/playground/Dockerfile - args: - PIP_INDEX_URL: ${PIP_INDEX_URL} - environment: - MESOS: mesosmaster:5050 - DYNAMO: http://dynamodb:8000 - depends_on: - - zookeeper - - mesosmaster - - mesosagent - - dynamodb - volumes: - - /var/run/docker.sock:/var/run/docker.sock - dynamodb: - image: deangiberson/aws-dynamodb-local - ports: - - '8000:8000' diff --git a/examples/cluster/mesos-agent-secret b/examples/cluster/mesos-agent-secret deleted file mode 100644 index c666e111..00000000 --- a/examples/cluster/mesos-agent-secret +++ /dev/null @@ -1,4 +0,0 @@ - { - "principal": "agent", - "secret": "secretagent" - } diff --git a/examples/cluster/mesos-secrets b/examples/cluster/mesos-secrets deleted file mode 100644 index 7d7663a6..00000000 --- a/examples/cluster/mesos-secrets +++ /dev/null @@ -1,12 +0,0 @@ -{ - "credentials": [ - { - "principal": "taskproc", - "secret": "secret" - }, - { - "principal": "agent", - "secret": "secretagent" - } - ] -} diff --git a/examples/cluster/playground/Dockerfile b/examples/cluster/playground/Dockerfile deleted file mode 100644 index d41295a7..00000000 --- a/examples/cluster/playground/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM ubuntu:jammy - -RUN apt-get update -yq && \ - apt-get install -yq \ - # needed to add a ppa - software-properties-common && \ - add-apt-repository ppa:deadsnakes/ppa - -RUN apt-get update -q && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - software-properties-common \ - debhelper dpkg-dev gcc gdebi-core git help2man libffi-dev \ - dh-virtualenv \ - libssl-dev libsasl2-modules libyaml-dev pyflakes3 python3.8-dev python3.8-distutils python3-pip python3-pytest python3-http-parser\ - tox python3-yaml wget zip zsh \ - openssh-server docker.io curl vim jq libsvn-dev \ - && apt-get clean - -ARG PIP_INDEX_URL -ENV PIP_INDEX_URL=${PIP_INDEX_URL:-https://pypi.python.org/simple} -RUN pip3 install --index-url ${PIP_INDEX_URL} virtualenv==16.7.5 - -RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd -RUN mkdir /var/run/sshd - -ADD . /src -ENV PYTHONPATH=/src -WORKDIR /src - -# temporarily downpin cryptography until we can make it grab the correct pre-built wheel in itests -RUN pip3 install . -RUN pip3 install -r requirements-dev.txt -RUN pip3 install pymesos - -CMD /bin/bash diff --git a/examples/cluster/secret b/examples/cluster/secret deleted file mode 100644 index d97c5ead..00000000 --- a/examples/cluster/secret +++ /dev/null @@ -1 +0,0 @@ -secret diff --git a/examples/common.py b/examples/common.py deleted file mode 100644 index 2df2849c..00000000 --- a/examples/common.py +++ /dev/null @@ -1,38 +0,0 @@ -import argparse -import os - - -def parse_args(): - parser = argparse.ArgumentParser(description="Runs a task processing task") - - parser.add_argument( - "-m", - "--master", - dest="master", - default=os.environ.get("MESOS", "127.0.0.1:5050"), - help="mesos master address", - ) - - parser.add_argument("-p", "--pool", dest="pool", help="mesos resource pool to use") - - parser.add_argument( - "-r", - "--role", - dest="role", - default="taskproc", - help="mesos reservation role to use", - ) - - with open("./examples/cluster/secret") as f: - default_secret = f.read().strip() - - parser.add_argument( - "-s", - "--secret", - dest="secret", - default=default_secret, - help="mesos secret to use", - ) - - args = parser.parse_args() - return args diff --git a/examples/dynamo_persistence.py b/examples/dynamo_persistence.py deleted file mode 100755 index cb28b045..00000000 --- a/examples/dynamo_persistence.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 -import logging -import os - -from boto3 import session -from botocore.errorfactory import ClientError - -from task_processing.plugins.persistence.dynamodb_persistence import DynamoDBPersister -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - mesos_address = os.getenv("MESOS", "mesosmaster:5050") - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - processor = TaskProcessor() - for p in ["mesos", "stateful"]: - processor.load_plugin(provider_module="task_processing.plugins." + p) - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - - s = session.Session( - region_name="foo", aws_access_key_id="foo", aws_secret_access_key="bar" - ) - dynamo_address = os.getenv("DYNAMO", "http://dynamodb:5050") - client = s.client( - service_name="dynamodb", - endpoint_url=dynamo_address, - ) - try: - create_table(client) - except ClientError: - pass - - executor = processor.executor_from_config( - provider="stateful", - provider_config={ - "downstream_executor": mesos_executor, - "persister": DynamoDBPersister( - table_name="events", endpoint_url=dynamo_address, session=s - ), - }, - ) - runner = Sync(executor=executor) - tasks = set() - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - for _ in range(1, 2): - task_config = TaskConfig(image="ubuntu:14.04", cmd="/bin/sleep 2") - tasks.add(task_config.task_id) - runner.run(task_config) - print(executor.status(task_config.task_id)) - - -def create_table(client): - return client.create_table( - TableName="events", - KeySchema=[ - {"AttributeName": "task_id", "KeyType": "HASH"}, - {"AttributeName": "timestamp", "KeyType": "RANGE"}, - ], - AttributeDefinitions=[ - {"AttributeName": "task_id", "AttributeType": "S"}, - {"AttributeName": "timestamp", "AttributeType": "N"}, - ], - ProvisionedThroughput={"ReadCapacityUnits": 123, "WriteCapacityUnits": 123}, - ) - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/file_persistence.py b/examples/file_persistence.py deleted file mode 100755 index 9606645c..00000000 --- a/examples/file_persistence.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import logging -import os - -from task_processing.plugins.persistence.file_persistence import FilePersistence -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - mesos_address = os.getenv("MESOS", "mesosmaster:5050") - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - processor = TaskProcessor() - for p in ["mesos", "stateful"]: - processor.load_plugin(provider_module="task_processing.plugins." + p) - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - executor = processor.executor_from_config( - provider="stateful", - provider_config={ - "downstream_executor": mesos_executor, - "persister": FilePersistence(output_file="/tmp/foo"), - }, - ) - - runner = Sync(executor=executor) - tasks = set() - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - for _ in range(1, 2): - task_config = TaskConfig(image="busybox", cmd="/bin/true") - tasks.add(task_config.task_id) - runner.run(task_config) - print(executor.status(task_config.task_id)) - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/hello-world.py b/examples/hello-world.py deleted file mode 100755 index 0f0a6b2d..00000000 --- a/examples/hello-world.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -import os - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -"""Simple hello-world example of how to use Task Processing (taskproc) -""" - - -def main(): - # get address of the Mesos cluster - mesos_address = os.getenv("MESOS", "mesosmaster:5050") - - # read in secret, this is used to authenticate the taskproc scheduler with - # Mesos - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - # create a processor instance - processor = TaskProcessor() - - # configure plugins - processor.load_plugin(provider_module="task_processing.plugins.mesos") - - # create an executor (taskproc executor NOT to be confused with a Mesos - # executor) using this defined configuration. this config can also be used - # to specify other Mesos properties, such as which role to use - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - - # creates a new Sync runner that will synchronously execute tasks - # (i.e. block until completion) - runner = Sync(executor) - - # next, create a TaskConfig to run - # this is where properties of the Mesos task can be specified in this - # example, we use the busybox Docker image and just echo "hello world" - TaskConfig = executor.TASK_CONFIG_INTERFACE - task_config = TaskConfig(image="busybox", cmd='echo "hello world"') - - # run our task and print the result - result = runner.run(task_config) - print(result) - - # this stops the taskproc framework and unregisters it from Mesos - runner.stop() - - return 0 if result.success else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/offer_timeout.py b/examples/offer_timeout.py deleted file mode 100755 index 77f04898..00000000 --- a/examples/offer_timeout.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -import logging -import time - -from common import parse_args - -from task_processing.runners.async_runner import Async -from task_processing.runners.async_runner import EventHandler -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -class Counter(object): - def __init__(self): - self.terminated = 0 - - def process_event(self, event): - print("task %s finished" % (event.task_id)) - self.terminated += 1 - - -def main(): - c = Counter() - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Async( - mesos_executor, - [ - EventHandler( - predicate=lambda x: x.terminal, - cb=c.process_event, - ) - ], - ) - timeout_task_config = TaskConfig( - image="busybox", - cmd="exec /bin/sleep 100", - offer_timeout=5.0, - cpus=20, - mem=2048, - disk=2000, - ) - runner.run(timeout_task_config) - - for _ in range(50): - if c.terminated >= 1: - break - print("waiting for task %s to finish" % (timeout_task_config.task_id)) - time.sleep(2) - - runner.stop() - return 0 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/promise.py b/examples/promise.py deleted file mode 100755 index 053ff0bf..00000000 --- a/examples/promise.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -import logging -from concurrent.futures import ThreadPoolExecutor -from concurrent.futures import wait - -from common import parse_args - -from task_processing.runners.promise import Promise -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - TaskConfig = executor.TASK_CONFIG_INTERFACE - task_config = TaskConfig(image="busybox", cmd="/bin/true") - # This only works on agents that have added mesos as a containerizer - # task_config = TaskConfig(containerizer='MESOS', cmd='/bin/true') - - with ThreadPoolExecutor(max_workers=2) as futures_executor: - runner = Promise(executor, futures_executor) - future = runner.run(task_config) - wait([future]) - result = future.result() - print(result) - print(result.raw) - runner.stop() - - return 0 if result.success else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/retry.py b/examples/retry.py deleted file mode 100755 index 29f6642a..00000000 --- a/examples/retry.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - executor = processor.executor_from_config( - provider="retrying", - provider_config={ - "downstream_executor": mesos_executor, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Sync(executor=executor) - task_config = TaskConfig( - image="docker-dev.yelpcorp.com/dumb-busybox", cmd="/bin/false", retries=2 - ) - result = runner.run(task_config) - print(result) - - runner.stop() - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/subscription.py b/examples/subscription.py deleted file mode 100755 index 62247277..00000000 --- a/examples/subscription.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 -import logging -import os - -from six.moves.queue import Empty -from six.moves.queue import Queue - -from task_processing.runners.subscription import Subscription -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - mesos_address = os.environ["MESOS"] - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - - queue = Queue(100) - runner = Subscription(executor, queue) - - tasks = set() - TaskConfig = executor.TASK_CONFIG_INTERFACE - for _ in range(2): - task_config = TaskConfig(image="busybox", cmd="/bin/true") - tasks.add(task_config.task_id) - runner.run(task_config) - - print("Running {} tasks: {}".format(len(tasks), tasks)) - while len(tasks) > 0: - try: - event = queue.get(block=True, timeout=10) - except Empty: - event = None - - if event is None: - print("Timeout while waiting for {}".format(tasks)) - break - else: - if event.terminal: - tasks.discard(event.task_id) - - runner.stop() - return 0 if len(tasks) == 0 else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/sync.py b/examples/sync.py deleted file mode 100755 index aa290356..00000000 --- a/examples/sync.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - TaskConfig = executor.TASK_CONFIG_INTERFACE - task_config = TaskConfig(image="busybox", cmd="/bin/true") - # This only works on agents that have added mesos as a containerizer - # task_config = TaskConfig(containerizer='MESOS', cmd='/bin/true') - - runner = Sync(executor) - result = runner.run(task_config) - print(result) - print(result.raw) - runner.stop() - - return 0 if result.success else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/task_logging.py b/examples/task_logging.py deleted file mode 100755 index 6bff6ad1..00000000 --- a/examples/task_logging.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - executor = processor.executor_from_config( - provider="logging", - provider_config={ - "downstream_executor": mesos_executor, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Sync(executor=executor) - task_config = TaskConfig( - image="ubuntu:14.04", - cmd="bash -c 'for i in $(seq 1 5); do echo $i&&sleep 10; done'", - ) - result = runner.run(task_config) - print(result) - - runner.stop() - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/timeout.py b/examples/timeout.py deleted file mode 100755 index f1d5a603..00000000 --- a/examples/timeout.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - executor = processor.executor_from_config( - provider="timeout", - provider_config={ - "downstream_executor": mesos_executor, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Sync(executor=executor) - task_config = TaskConfig( - image="docker-dev.yelpcorp.com/dumb-busybox", - cmd="exec dumb-init /bin/sleep 30", - timeout=10, - ) - result = runner.run(task_config) - print(result) - - runner.stop() - - -if __name__ == "__main__": - exit(main()) diff --git a/itest b/itest deleted file mode 100755 index 8957424e..00000000 --- a/itest +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -eux - -examples/async.py -examples/file_persistence.py -examples/hello-world.py -examples/subscription.py -examples/sync.py -examples/retry.py -examples/timeout.py - -# TODO: These should probably also be run eventually: -# examples/promise.py -# examples/dynamo_persistence.py diff --git a/setup.py b/setup.py index cfd095b8..7158d454 100644 --- a/setup.py +++ b/setup.py @@ -39,8 +39,6 @@ "pyrsistent", ], extras_require={ - # We can add the Mesos specific dependencies here - "mesos_executor": ["addict", "pymesos>=0.2.14", "requests"], "metrics": ["yelp-meteorite"], "persistence": ["boto3"], "k8s": ["kubernetes", "typing-extensions"], diff --git a/task_processing/plugins/mesos/__init__.py b/task_processing/plugins/mesos/__init__.py deleted file mode 100644 index 5f546fe5..00000000 --- a/task_processing/plugins/mesos/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .logging_executor import MesosLoggingExecutor -from .mesos_pod_executor import MesosPodExecutor -from .mesos_task_executor import MesosTaskExecutor -from .retrying_executor import RetryingExecutor -from .timeout_executor import TimeoutExecutor - - -TASK_PROCESSING_PLUGIN = "mesos_plugin" - - -def register_plugin(registry): - return ( - registry.register_task_executor("logging", MesosLoggingExecutor) - .register_deprecated_task_executor("mesos", MesosTaskExecutor) - .register_task_executor("mesos_task", MesosTaskExecutor) - .register_task_executor("mesos_pod", MesosPodExecutor) - .register_task_executor("retrying", RetryingExecutor) - .register_task_executor("timeout", TimeoutExecutor) - ) diff --git a/task_processing/plugins/mesos/constraints.py b/task_processing/plugins/mesos/constraints.py deleted file mode 100644 index 4b470ab8..00000000 --- a/task_processing/plugins/mesos/constraints.py +++ /dev/null @@ -1,65 +0,0 @@ -import re - -from pyrsistent import field -from pyrsistent import PRecord - - -def equals_op(expected_value, actual_value): - return expected_value == actual_value - - -def notequals_op(expected_value, actual_value): - return expected_value != actual_value - - -def like_op(re_pattern, actual_value): - return re.fullmatch(re_pattern, actual_value) - - -def unlike_op(re_pattern, actual_value): - return not like_op(re_pattern, actual_value) - - -OPERATORS = { - "EQUALS": equals_op, - "==": equals_op, - "NOTEQUALS": notequals_op, - "!=": notequals_op, - "LIKE": like_op, - "UNLIKE": unlike_op, -} - - -def _attributes_match_constraint(attributes, constraint): - actual_value = attributes.get(constraint.attribute) - # If the dictionary doesn't contain an attribute from the constraint then - # the constraint is satisfied. - if actual_value is None: - return True - - # The operator names have already been validated by the validator in - # `MesosTaskConfig`, so it's guaranteed that it's in `OPERATORS`. - return OPERATORS[constraint.operator](constraint.value, actual_value) - - -def attributes_match_constraints(attributes, constraints): - # If constraints aren't specified then they are satisfied. - if constraints is None: - return True - - return all(_attributes_match_constraint(attributes, c) for c in constraints) - - -def valid_constraint_operator_name(name): - operators_names = OPERATORS.keys() - return ( - name in operators_names, - "{operator} is not a valid operator, valid operators are " - "{operators}.".format(operator=name, operators=operators_names), - ) - - -class Constraint(PRecord): - attribute = field(type=str) - operator = field(type=str, invariant=valid_constraint_operator_name) - value = field(type=str) diff --git a/task_processing/plugins/mesos/execution_framework.py b/task_processing/plugins/mesos/execution_framework.py deleted file mode 100644 index 357143f5..00000000 --- a/task_processing/plugins/mesos/execution_framework.py +++ /dev/null @@ -1,693 +0,0 @@ -import logging -import socket -import threading -import time -from collections import defaultdict -from queue import Queue -from typing import Optional # noqa, flake8 issue -from typing import TYPE_CHECKING - -from addict import Dict -from pymesos.interface import Scheduler -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PRecord -from pyrsistent import PVector -from pyrsistent import v - -from task_processing.interfaces.event import control_event -from task_processing.interfaces.event import task_event -from task_processing.metrics import create_counter -from task_processing.metrics import create_timer -from task_processing.metrics import get_metric -from task_processing.plugins.mesos import metrics -from task_processing.plugins.mesos.resource_helpers import get_offer_resources - - -if TYPE_CHECKING: - from .mesos_executor import MesosExecutorCallbacks # noqa - - -log = logging.getLogger(__name__) - - -class TaskMetadata(PRecord): - agent_id = field(type=str, initial="") - task_config = field(type=PRecord, mandatory=True) - task_state = field(type=str, mandatory=True) - task_state_history = field(type=PMap, factory=pmap, mandatory=True) - - -class ExecutionFramework(Scheduler): - callbacks: "MesosExecutorCallbacks" - - def __init__( - self, - name, - role, - callbacks: "MesosExecutorCallbacks", - task_staging_timeout_s, - pool=None, - slave_blacklist_timeout_s=900, - offer_backoff=10, - suppress_delay=10, - initial_decline_delay=1, - task_reconciliation_delay=300, - framework_id=None, - failover_timeout=604800, # 1 week - ) -> None: - self.name = name - # wait this long for a task to launch. - self.task_staging_timeout_s = task_staging_timeout_s - self.pool = pool - self.role = role - self.callbacks = callbacks - self.slave_blacklist_timeout_s = slave_blacklist_timeout_s - self.offer_backoff = offer_backoff - - # TODO: why does this need to be root, can it be "mesos plz figure out" - self.framework_info = Dict( - user="root", - name=self.name, - checkpoint=True, - role=self.role, - failover_timeout=failover_timeout, - ) - if framework_id: - self.framework_info["id"] = {"value": framework_id} - - self.task_queue: Queue = Queue() - self.event_queue: Queue = Queue() - self._driver: Optional[Scheduler] = None - self.are_offers_suppressed = False - self.suppress_after = int(time.time()) + suppress_delay - self.decline_after = time.time() + initial_decline_delay - self._task_reconciliation_delay = task_reconciliation_delay - self._reconcile_tasks_at = time.time() + self._task_reconciliation_delay - - self.offer_decline_filter = Dict(refuse_seconds=self.offer_backoff) - self._lock = threading.RLock() - self.blacklisted_slaves: PVector = v() - self.task_metadata: PMap = m() - - self._initialize_metrics() - self._last_offer_time: Optional[float] = None - self._terminal_task_counts = { - "TASK_FINISHED": metrics.TASK_FINISHED_COUNT, - "TASK_LOST": metrics.TASK_LOST_COUNT, - "TASK_KILLED": metrics.TASK_KILLED_COUNT, - "TASK_FAILED": metrics.TASK_FAILED_COUNT, - "TASK_ERROR": metrics.TASK_ERROR_COUNT, - "TASK_OFFER_TIMEOUT": metrics.TASK_OFFER_TIMEOUT, - } - - self.driver_error = object() - - self.stopping = False - task_kill_thread = threading.Thread(target=self._background_check, args=()) - task_kill_thread.daemon = True - task_kill_thread.start() - - def call_driver(self, method, *args, **kwargs): - if not self._driver: - log.error(f"{method} failed: No driver") - return self.driver_error - - try: - return getattr(self._driver, method)(*args, **kwargs) - except (socket.timeout, Exception) as e: - log.warning(f"{method} failed: {str(e)}") - return self.driver_error - - def _background_check_task(self, time_now, tasks_to_reconcile, task_id, md): - if md.task_state != "TASK_INITED": - tasks_to_reconcile.append(task_id) - - if md.task_state == "TASK_INITED": - # give up if the task hasn't launched after - # offer_timeout - inited_at = md.task_state_history["TASK_INITED"] - offer_timeout = md.task_config.offer_timeout - expires_at = inited_at + offer_timeout - if time_now >= expires_at: - log.warning( - f"Task {task_id} has been waiting for offers " - "for longer than configured timeout " - f"{offer_timeout}. Giving up and removing the " - "task from the task queue." - ) - # killing the task will also remove them from the queue - self.kill_task(task_id) - # we are not expecting mesos to send terminal update - # for this task, so cleaning it up manually - self.task_metadata = self.task_metadata.discard(task_id) - self.event_queue.put( - task_event( - task_id=task_id, - terminal=True, - timestamp=time_now, - success=False, - message="stop", - task_config=md.task_config, - raw="Failed due to offer timeout", - ) - ) - get_metric(metrics.TASK_OFFER_TIMEOUT).count(1) - - # Task is not eligible for killing or reenqueuing - in_current_state_since = md.task_state_history[md.task_state] - if time_now < in_current_state_since + self.task_staging_timeout_s: - return - - if md.task_state == "UNKNOWN": - log.warning( - f"Re-enqueuing task {task_id} in unknown state for " - f"longer than {self.task_staging_timeout_s}" - ) - # Re-enqueue task - self.enqueue_task(md.task_config) - get_metric(metrics.TASK_FAILED_TO_LAUNCH_COUNT).count(1) - elif md.task_state == "TASK_STAGING": - log.warning(f"Killing stuck task {task_id}") - self.kill_task(task_id) - self.task_metadata = self.task_metadata.set( - task_id, - md.set( - task_state="TASK_STUCK", - task_state_history=md.task_state_history.set( - "TASK_STUCK", time_now - ), - ), - ) - self.blacklist_slave( - agent_id=self.task_metadata[task_id].agent_id, - timeout=self.slave_blacklist_timeout_s, - ) - get_metric(metrics.TASK_STUCK_COUNT).count(1) - elif md.task_state == "TASK_STUCK": - t = time.time() - # 10s since last iteration + time we spent in current one - time_delta = 10 + t - time_now - # seconds since task was put in TASK_STUCK state - time_stuck = t - md.task_state_history["TASK_STUCK"] - # seconds since `time_stuck` crossed another hour - # boundary - hour_rolled = time_stuck % 3600 - - # if `time_stuck` crossed hour boundary since last - # background check - lets re-send kill request - if hour_rolled < time_delta: - hours_stuck = time_stuck // 3600 - log.warning( - f"Task {task_id} is stuck, waiting for terminal " - f"state for {hours_stuck}h, sending another kill" - ) - self.kill_task(task_id) - - def _background_check(self): - while True: - if self.stopping: - return - - time_now = time.time() - tasks_to_reconcile = [] - with self._lock: - for task_id, md in self.task_metadata.items(): - self._background_check_task( - time_now, - tasks_to_reconcile, - task_id, - md, - ) - - self._reconcile_tasks( - [ - Dict({"task_id": Dict({"value": task_id})}) - for task_id in tasks_to_reconcile - ] - ) - elapsed = time.time() - time_now - log.info(f"background check done in {elapsed}s") - get_metric(metrics.BGCHECK_TIME_TIMER).record(elapsed) - time.sleep(10) - - def reconcile_task(self, task_config): - task_id = task_config.task_id - with self._lock: - if task_id in self.task_metadata: - md = self.task_metadata[task_id] - self.task_metadata = self.task_metadata.set( - task_id, - md.set( - task_state="TASK_RECONCILING", - task_state_history=md.task_state_history.set( - "TASK_RECONCILING", time.time() - ), - ), - ) - else: - log.info(f"Adding {task_id} to metadata for reconciliation") - self.task_metadata = self.task_metadata.set( - task_id, - TaskMetadata( - task_config=task_config, - task_state="TASK_RECONCILING", - task_state_history=m(TASK_RECONCILING=time.time()), - ), - ) - self._reconcile_tasks([Dict({"task_id": Dict({"value": task_id})})]) - - def _reconcile_tasks(self, tasks_to_reconcile): - if time.time() < self._reconcile_tasks_at: - return - - log.info(f"Reconciling following tasks {tasks_to_reconcile}") - - if len(tasks_to_reconcile) > 0: - self.call_driver("reconcileTasks", tasks_to_reconcile) - - self._reconcile_tasks_at += self._task_reconciliation_delay - - def offer_matches_pool(self, offer): - if self.pool is None: - # If pool is not specified, then we can accept offer from any agent - return True, None - - for attribute in offer.attributes: - if attribute.name == "pool": - return attribute.text.value == self.pool, attribute.text.value - - return False, None - - def kill_task(self, task_id): - tmp_list = [] - flag = False - with self._lock: - while not self.task_queue.empty(): - t = self.task_queue.get() - if task_id == t.task_id: - flag = True - self.task_metadata = self.task_metadata.discard(task_id) - else: - tmp_list.append(t) - - for t in tmp_list: - self.task_queue.put(t) - - if flag is False: - if self.call_driver("killTask", Dict(value=task_id)) is self.driver_error: - return False - - return True - - def blacklist_slave(self, agent_id, timeout): - with self._lock: - # A new entry is appended even if the agent is being blacklisted. - # This is equivalent to restarting the blacklist timer. - log.info(f"Blacklisting slave: {agent_id} for {timeout} seconds.") - self.blacklisted_slaves = self.blacklisted_slaves.append(agent_id) - get_metric(metrics.BLACKLISTED_AGENTS_COUNT).count(1) - - unblacklist_thread = threading.Thread( - target=self.unblacklist_slave, - kwargs={"timeout": timeout, "agent_id": agent_id}, - ) - unblacklist_thread.daemon = True - unblacklist_thread.start() - - def unblacklist_slave(self, agent_id, timeout): - time.sleep(timeout) - log.info(f"Unblacklisting slave: {agent_id}") - with self._lock: - self.blacklisted_slaves = self.blacklisted_slaves.remove(agent_id) - - def enqueue_task(self, task_config): - with self._lock: - # task_state and task_state_history get reset every time - # a task is enqueued. - self.task_metadata = self.task_metadata.set( - task_config.task_id, - TaskMetadata( - task_config=task_config, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=time.time()), - ), - ) - # Need to lock on task_queue to prevent enqueues when getting - # tasks to launch - self.task_queue.put(task_config) - - if self.are_offers_suppressed: - if self.call_driver("reviveOffers") is not self.driver_error: - self.are_offers_suppressed = False - log.info("Reviving offers because we have tasks to run.") - - get_metric(metrics.TASK_ENQUEUED_COUNT).count(1) - - def launch_tasks_for_offer(self, offer, tasks_to_launch) -> bool: - mesos_protobuf_tasks = [ - self.callbacks.make_mesos_protobuf( - task_config, offer.agent_id.value, self.role - ) - for task_config in tasks_to_launch - if task_config.task_id in self.task_metadata - ] - if not mesos_protobuf_tasks: - return False - - launched = True - launch_time = time.time() - if ( - self.call_driver("launchTasks", offer.id, mesos_protobuf_tasks) - is self.driver_error - ): - tasks = ", ".join(task.task_id for task in tasks_to_launch) - log.warning(f"Failed to launch: {tasks}, moving them to UNKNOWN state") - get_metric(metrics.TASK_LAUNCH_FAILED_COUNT).count(1) - launched = False - - # 'UNKNOWN' state is for internal tracking. It will not be - # propogated to users. - current_task_state = "TASK_STAGING" if launched else "UNKNOWN" - - for task in tasks_to_launch: - md = self.task_metadata.get(task.task_id) - if not md: - log.warning( - f"trying to launch task {task.task_id}, but it is not in task metadata." - f"current keys in task_metadata: {self.task_metadata.keys()}" - ) - continue - self.task_metadata = self.task_metadata.set( - task.task_id, - md.set( - task_state=current_task_state, - task_state_history=md.task_state_history.set( - current_task_state, launch_time - ), - agent_id=str(offer.agent_id.value), - ), - ) - - get_metric(metrics.TASK_QUEUED_TIME_TIMER).record( - launch_time - md.task_state_history["TASK_INITED"] - ) - - # Emit the staging event for successful launches - if launched: - self.event_queue.put( - self.callbacks.handle_status_update( - Dict(state="TASK_STAGING", offer=offer), - md.task_config, - ) - ) - get_metric(metrics.TASK_LAUNCHED_COUNT).count(1) - - return launched - - def stop(self): - self.stopping = True - - # TODO: add mesos cluster dimension when available - def _initialize_metrics(self): - default_dimensions = { - "framework_name": ".".join(self.name.split()[:2]), - "framework_role": self.role, - } - - counters = [ - metrics.TASK_LAUNCHED_COUNT, - metrics.TASK_FINISHED_COUNT, - metrics.TASK_FAILED_COUNT, - metrics.TASK_KILLED_COUNT, - metrics.TASK_LOST_COUNT, - metrics.TASK_ERROR_COUNT, - metrics.TASK_ENQUEUED_COUNT, - metrics.TASK_INSUFFICIENT_OFFER_COUNT, - metrics.TASK_STUCK_COUNT, - metrics.BLACKLISTED_AGENTS_COUNT, - metrics.TASK_LOST_DUE_TO_INVALID_OFFER_COUNT, - metrics.TASK_LAUNCH_FAILED_COUNT, - metrics.TASK_FAILED_TO_LAUNCH_COUNT, - metrics.TASK_OFFER_TIMEOUT, - ] - for cnt in counters: - create_counter(cnt, default_dimensions) - - timers = [ - metrics.OFFER_DELAY_TIMER, - metrics.TASK_QUEUED_TIME_TIMER, - metrics.BGCHECK_TIME_TIMER, - ] - for tmr in timers: - create_timer(tmr, default_dimensions) - - #################################################################### - # Mesos driver hooks go here # - #################################################################### - def offerRescinded(self, driver, offerId): - # TODO(sagarp): Executor should be able to deal with this. - log.warning(f"Offer {offerId} rescinded") - - def error(self, driver, message): - event = control_event(raw=message) - - # TODO: have a mapper function similar to translator of task events - if message == "Framework has been removed": - event = event.set(message="stop") - else: - event = event.set(message="unknown") - - self.event_queue.put(event) - - def slaveLost(self, drive, slaveId): - log.warning(f"Slave lost: {str(slaveId)}") - - def registered(self, driver, frameworkId, masterInfo): - self._driver = driver - event = control_event( - raw={ - "master_info": masterInfo, - "framework_id": frameworkId, - }, - message="registered", - ) - self.event_queue.put(event) - log.info( - f"Registered with framework ID {frameworkId.value} and role {self.role}" - ) - - def reregistered(self, driver, masterInfo): - self._driver = driver - log.warning(f"Re-registered to {masterInfo} with role {self.role}") - - def resourceOffers(self, driver, offers) -> None: - self._driver = driver - - current_offer_time = time.time() - if self._last_offer_time is not None: - get_metric(metrics.OFFER_DELAY_TIMER).record( - current_offer_time - self._last_offer_time - ) - self._last_offer_time = current_offer_time - - # Give user some time to enqueue tasks - if self.task_queue.empty() and current_offer_time < self.decline_after: - time.sleep(self.decline_after - current_offer_time) - - declined: dict = defaultdict(list) - declined_offer_ids = [] - accepted = [] - - with self._lock: - if self.task_queue.empty(): - # Always suppress offers when there is nothing to run - if self.call_driver("suppressOffers") is not self.driver_error: - self.are_offers_suppressed = True - log.info("Suppressing offers, no more tasks to run.") - - for offer in offers: - declined["no tasks"].append(offer.id.value) - declined_offer_ids.append(offer.id) - - self.call_driver( - "declineOffer", declined_offer_ids, self.offer_decline_filter - ) - log.info( - f"Offers declined because of no tasks: {','.join(declined['no tasks'])}" - ) - return - - with_maintenance_window = [offer for offer in offers if offer.unavailability] - - for offer in with_maintenance_window: - start_time = offer.unavailability.start["nanoseconds"] - completion_time = int( - (start_time + offer.unavailability.duration["nanoseconds"]) / 1000000000 - ) - now = int(time.time()) - duration = completion_time - now - if duration > 0: - self.blacklist_slave( - agent_id=offer.agent_id.value, - timeout=duration, - ) - - without_maintenance_window = [ - offer for offer in offers if offer not in with_maintenance_window - ] - for offer in without_maintenance_window: - with self._lock: - if offer.agent_id.value in self.blacklisted_slaves: - declined["blacklisted"].append( - f"offer {offer.id.value} agent {offer.agent_id.value}" - ) - declined_offer_ids.append(offer.id) - continue - - offer_pool_match, offer_pool = self.offer_matches_pool(offer) - if not offer_pool_match: - log.info( - f"Declining offer {offer.id.value}, required pool " - f"{self.pool} doesn't match offered pool {offer_pool}" - ) - declined["bad pool"].append(offer.id.value) - declined_offer_ids.append(offer.id) - continue - - # Need to lock here even though we are only reading the task_queue, since - # we are predicating on the queue's emptiness. If not locked, other - # threads can continue enqueueing, and we never terminate the loop. - task_configs = [] - with self._lock: - while not self.task_queue.empty(): - task_configs.append(self.task_queue.get()) - - offer_resources = get_offer_resources(offer, self.role) - offer_attributes = { - attribute.name: attribute.text.value - for attribute in offer.attributes - } - log.info( - f"Received offer {offer.id.value} for role {self.role}: {offer_resources}" - ) - tasks_to_launch, tasks_to_defer = self.callbacks.get_tasks_for_offer( - task_configs, - offer_resources, - offer_attributes, - self.role, - ) - - for task in tasks_to_defer: - self.task_queue.put(task) - get_metric(metrics.TASK_INSUFFICIENT_OFFER_COUNT).count( - len(tasks_to_defer) - ) - - if len(tasks_to_launch) == 0: - declined["bad resources"].append(offer.id.value) - declined_offer_ids.append(offer.id) - continue - - ignored_tasks = ",".join( - task_config.task_id - for task_config in tasks_to_launch - if task_config.task_id not in self.task_metadata - ) - if ignored_tasks: - log.warning(f"ignoring tasks not in metadata: {ignored_tasks}") - - tasks_to_launch = [ - task_config - for task_config in tasks_to_launch - if task_config.task_id in self.task_metadata - ] - - if len(tasks_to_launch) == 0: - declined["nothing to launch"].append(offer.id.value) - declined_offer_ids.append(offer.id) - elif not self.launch_tasks_for_offer(offer, tasks_to_launch): - declined["launch failed"].append(offer.id.value) - declined_offer_ids.append(offer.id) - else: - accepted.append( - f"offer: {offer.id.value} " - f"agent: {offer.agent_id.value} " - f"tasks: {len(tasks_to_launch)}" - ) - - if len(declined_offer_ids) > 0: - self.call_driver( - "declineOffer", declined_offer_ids, self.offer_decline_filter - ) - for reason, items in declined.items(): - log.info(f"Offers declined because {reason}: {', '.join(items)}") - if accepted: - log.info(f"Offers accepted: {', '.join(accepted)}") - - def statusUpdate(self, driver, update) -> None: - self._driver = driver - - task_id = update.task_id.value - task_state = str(update.state) - log.info(f"Task update {task_state} received for task {task_id}") - - if task_id not in self.task_metadata: - # We assume that a terminal status update has been - # received for this task already. - log.info( - "Ignoring this status update because a terminal status " - "update has been received for this task already." - ) - self.call_driver("acknowledgeStatusUpdate", update) - return - - md = self.task_metadata[task_id] - - # If we attempt to accept an offer that has been invalidated by - # master for some reason such as offer has been rescinded or we - # have exceeded offer_timeout, then we will get TASK_LOST status - # update back from mesos master. - if task_state == "TASK_LOST" and str(update.reason) == "REASON_INVALID_OFFERS": - # This task has not been launched. Therefore, we are going to - # reenqueue it. We are not propogating any event up to the - # application. - log.warning( - "Received TASK_LOST from mesos master because we " - "attempted to accept an invalid offer. Going to " - f"re-enqueue this task {task_id}" - ) - # Re-enqueue task - self.enqueue_task(md.task_config) - get_metric(metrics.TASK_LOST_DUE_TO_INVALID_OFFER_COUNT).count(1) - self.call_driver("acknowledgeStatusUpdate", update) - return - - # Record state changes, send a new event and emit metrics only if the - # task state has actually changed. - if md.task_state != task_state: - with self._lock: - self.task_metadata = self.task_metadata.set( - task_id, - md.set( - task_state=task_state, - task_state_history=md.task_state_history.set( - task_state, time.time() - ), - ), - ) - - self.event_queue.put( - self.callbacks.handle_status_update(update, md.task_config), - ) - - if task_state in self._terminal_task_counts: - with self._lock: - self.task_metadata = self.task_metadata.discard(task_id) - get_metric(self._terminal_task_counts[task_state]).count(1) - - # We have to do this because we are not using implicit - # acknowledgements. - self.call_driver("acknowledgeStatusUpdate", update) diff --git a/task_processing/plugins/mesos/logging_executor.py b/task_processing/plugins/mesos/logging_executor.py deleted file mode 100644 index f77cca12..00000000 --- a/task_processing/plugins/mesos/logging_executor.py +++ /dev/null @@ -1,269 +0,0 @@ -import logging -import sys -import time -from queue import Queue -from threading import Lock -from threading import Thread -from urllib.parse import urlparse - -import requests -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PRecord -from pyrsistent import v - -from task_processing.interfaces.task_executor import TaskExecutor - - -log = logging.getLogger(__name__) -logging.getLogger("urllib3").setLevel(logging.WARNING) - -# Read task log in 4K chunks -TASK_LOG_CHUNK_LEN = 4096 -DEFAULT_FORMAT = "{task_id}[{container_id}@{agent}]: {line}" -LOG_REQUEST_TIMEOUT = 5 # seconds - - -class LogMetadata(PRecord): - log_url = field(type=str, initial="") - log_path = field(type=str, initial="") - log_offsets = field( - type=PMap, factory=pmap, initial=pmap({"stdout": 0, "stderr": 0}) - ) - container_id = field(type=str, initial="") - executor_id = field(type=str, initial="") - - -def standard_handler(task_id, message, stream): - print(message, file=sys.stderr if stream == "stderr" else sys.stdout) - - -class MesosLoggingExecutor(TaskExecutor): - def __init__( - self, - downstream_executor, - handler=standard_handler, - format_string=DEFAULT_FORMAT, - ): - self.downstream_executor = downstream_executor - self.TASK_CONFIG_INTERFACE = downstream_executor.TASK_CONFIG_INTERFACE - self.handler = handler - self.format_string = format_string - - self.src_queue = downstream_executor.get_event_queue() - self.dest_queue = Queue() - self.stopping = False - - self.staging_tasks = m() - self.running_tasks = m() - self.done_tasks = v() - - # A lock is needed to synchronize logging and event processing - self.task_lock = Lock() - - self.event_thread = Thread(target=self.event_loop) - self.event_thread.daemon = True - self.event_thread.start() - - self.logging_thread = Thread(target=self.logging_loop) - self.logging_thread.daemon = True - self.logging_thread.start() - - def log_line(self, stream, line, task_id, container_id, agent): - formatted_line = self.format_string.format( - task_id=task_id, - container_id=container_id, - agent=agent, - line=line, - ) - self.handler(task_id, formatted_line, stream) - - def set_task_log_path(self, task_id): - log_md = self.running_tasks[task_id] - if log_md.log_url is None: - log.error(f"No log_url available for {task_id}") - return - try: - response = requests.get( - log_md.log_url + "/files/debug", - timeout=LOG_REQUEST_TIMEOUT, - ).json() - except Exception as e: - log.error("Failed to fetch files {error}".format(error=e)) - return - - for key in response.keys(): - if log_md.executor_id in key and log_md.container_id in key: - with self.task_lock: - self.running_tasks = self.running_tasks.set( - task_id, - log_md.set(log_path=key), - ) - break - - def stream_task_log(self, task_id): - if self.running_tasks[task_id].log_path == "": - self.set_task_log_path(task_id) - - # Abort in case the log path discovery was not successful - log_md = self.running_tasks[task_id] - if log_md.log_path == "": - return - - offsets = { - "stdout": log_md.log_offsets["stdout"], - "stderr": log_md.log_offsets["stderr"], - } - agent = urlparse(log_md.log_url).hostname - - for f in ["stdout", "stderr"]: - offset = offsets[f] - log_path = log_md.log_path + "/" + f - while True: - payload = { - "path": log_path, - "length": str(TASK_LOG_CHUNK_LEN), - "offset": str(offset), - } - - try: - response = requests.get( - log_md.log_url + "/files/read", - params=payload, - timeout=LOG_REQUEST_TIMEOUT, - ).json() - - log_length = len(response["data"]) - for line in response["data"].splitlines(): - self.log_line( - stream=f, - line=line, - task_id=task_id, - container_id=log_md.container_id, - agent=agent, - ) - except Exception as e: - log.error( - "Failed to get {path}@{agent} {error}".format( - path=log_path, agent=agent, error=e - ) - ) - break - - offset = offset + log_length - # Stop if there is no more data - if log_length < TASK_LOG_CHUNK_LEN: - break - # Update offset of this stream - offsets[f] = offset - - # Update both offsets for the task - with self.task_lock: - self.running_tasks = self.running_tasks.set( - task_id, - log_md.set(log_offsets=pmap(offsets)), - ) - - # process downstream events - def event_loop(self): - while True: - while not self.src_queue.empty(): - e = self.src_queue.get() - self.dest_queue.put(e) - self.src_queue.task_done() - - # Record the base log url - if e.kind == "task" and e.platform_type == "staging": - if e.task_id in self.staging_tasks: - continue - url = extract_url_from_offer(e.raw.offer) - self.staging_tasks = self.staging_tasks.set(e.task_id, url) - - if e.kind == "task" and e.platform_type == "running": - if e.task_id not in self.staging_tasks: - log.info(f"Task {e.task_id} already running, not fetching logs") - continue - - url = self.staging_tasks[e.task_id] - self.staging_tasks = self.staging_tasks.discard(e.task_id) - - # Simply pass the needed fields and let the logging thread - # to take care of the slow path discovery. - container_id = e.raw.container_status.container_id.value - executor_id = e.raw.executor_id.value - with self.task_lock: - self.running_tasks = self.running_tasks.set( - e.task_id, - LogMetadata( - log_url=url, - container_id=container_id, - executor_id=executor_id, - ), - ) - - # Fetch the last log and remove the entry if the task is active - if e.kind == "task" and e.terminal: - with self.task_lock: - if e.task_id in self.running_tasks: - self.done_tasks = self.done_tasks.append(e.task_id) - - if self.stopping: - return - - time.sleep(1) - - def logging_loop(self): - while True: - # grab logs - for task_id in self.running_tasks.keys(): - self.stream_task_log(task_id) - - while len(self.done_tasks): - task_id = self.done_tasks[0] - self.stream_task_log(task_id) - with self.task_lock: - self.done_tasks = self.done_tasks.remove(task_id) - self.running_tasks = self.running_tasks.discard(task_id) - - if self.stopping: - return - - time.sleep(10) - - def run(self, task_config): - self.downstream_executor.run(task_config) - - def stop(self): - self.downstream_executor.stop() - self.stopping = True - self.event_thread.join() - self.logging_thread.join() - - def get_event_queue(self): - return self.dest_queue - - def reconcile(self, task_config): - self.downstream_executor.reconcile(task_config) - - def kill(self, task_id): - return self.downstream_executor.kill(task_id) - - -def extract_url_from_offer(offer): - try: - url = ( - offer.url.scheme - + "://" - + offer.url.address.ip - + ":" - + str(offer.url.address.port) - ) - except Exception as exc: - log.error( - f"Error decoding the url for this offer: {offer.url}. " - f"Setting to None. Exception: {exc}" - ) - url = None - return url diff --git a/task_processing/plugins/mesos/mesos_executor.py b/task_processing/plugins/mesos/mesos_executor.py deleted file mode 100644 index 978e7d1b..00000000 --- a/task_processing/plugins/mesos/mesos_executor.py +++ /dev/null @@ -1,108 +0,0 @@ -import logging -import threading -from typing import Callable -from typing import List -from typing import NamedTuple -from typing import Tuple - -import addict -from pymesos import MesosSchedulerDriver - -from task_processing.interfaces.event import Event -from task_processing.interfaces.task_executor import TaskExecutor -from task_processing.plugins.mesos.execution_framework import ExecutionFramework -from task_processing.plugins.mesos.resource_helpers import ResourceSet -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -class MesosExecutorCallbacks(NamedTuple): - get_tasks_for_offer: Callable[ - [List[MesosTaskConfig], ResourceSet, dict, str], - Tuple[List[addict.Dict], List[MesosTaskConfig]], - ] - handle_status_update: Callable[ - [addict.Dict, MesosTaskConfig], - Event, - ] - make_mesos_protobuf: Callable[ - [MesosTaskConfig, str, str], - addict.Dict, - ] - - -class MesosExecutor(TaskExecutor): - def __init__( - self, - role: str, - callbacks: MesosExecutorCallbacks, - pool=None, - principal="taskproc", - secret=None, - mesos_address="127.0.0.1:5050", - initial_decline_delay=1.0, - framework_name="taskproc-default", - framework_staging_timeout=240, - framework_id=None, - failover=False, - ) -> None: - """ - Constructs the instance of a task execution, encapsulating all state - required to run, monitor and stop the job. - - TODO param docstrings - """ - - self.logger = logging.getLogger(__name__) - self.role = role - self.failover = failover - - self.execution_framework = ExecutionFramework( - role=role, - pool=pool, - name=framework_name, - callbacks=callbacks, - task_staging_timeout_s=framework_staging_timeout, - initial_decline_delay=initial_decline_delay, - framework_id=framework_id, - ) - - # TODO: Get mesos master ips from smartstack - self.driver = MesosSchedulerDriver( - sched=self.execution_framework, - framework=self.execution_framework.framework_info, - use_addict=True, - master_uri=mesos_address, - implicit_acknowledgements=False, - principal=principal, - secret=secret, - failover=failover, - ) - - # start driver thread immediately - self.stopping = False - self.driver_thread = threading.Thread(target=self._run_driver, args=()) - self.driver_thread.daemon = True - self.driver_thread.start() - - def _run_driver(self): - while not self.stopping: - self.driver.run() - self.logger.warning("Driver stopped, starting again") - - def run(self, task_config): - self.execution_framework.enqueue_task(task_config) - - def reconcile(self, task_config): - self.execution_framework.reconcile_task(task_config) - - def kill(self, task_id): - return self.execution_framework.kill_task(task_id) - - def stop(self): - self.stopping = True - self.execution_framework.stop() - self.driver.stop(failover=self.failover) - self.driver.join() - - def get_event_queue(self): - return self.execution_framework.event_queue diff --git a/task_processing/plugins/mesos/mesos_pod_executor.py b/task_processing/plugins/mesos/mesos_pod_executor.py deleted file mode 100644 index e869fdea..00000000 --- a/task_processing/plugins/mesos/mesos_pod_executor.py +++ /dev/null @@ -1,6 +0,0 @@ -from task_processing.plugins.mesos.mesos_executor import MesosExecutor - - -class MesosPodExecutor(MesosExecutor): - def __init__(self, role, *args, **kwargs) -> None: - raise NotImplementedError diff --git a/task_processing/plugins/mesos/mesos_task_executor.py b/task_processing/plugins/mesos/mesos_task_executor.py deleted file mode 100644 index 4acd51cc..00000000 --- a/task_processing/plugins/mesos/mesos_task_executor.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import List -from typing import Tuple - -from task_processing.plugins.mesos.constraints import attributes_match_constraints -from task_processing.plugins.mesos.mesos_executor import MesosExecutor -from task_processing.plugins.mesos.mesos_executor import MesosExecutorCallbacks -from task_processing.plugins.mesos.resource_helpers import allocate_task_resources -from task_processing.plugins.mesos.resource_helpers import ResourceSet -from task_processing.plugins.mesos.resource_helpers import task_fits -from task_processing.plugins.mesos.task_config import MesosTaskConfig -from task_processing.plugins.mesos.translator import make_mesos_task_info -from task_processing.plugins.mesos.translator import mesos_update_to_event - - -def get_tasks_for_offer( - task_configs: List[MesosTaskConfig], - offer_resources: ResourceSet, - offer_attributes: dict, - role: str, -) -> Tuple[List[MesosTaskConfig], List[MesosTaskConfig]]: - tasks_to_launch, tasks_to_defer = [], [] - - for task_config in task_configs: - if task_fits(task_config, offer_resources) and attributes_match_constraints( - offer_attributes, task_config.constraints - ): - prepared_task_config, offer_resources = allocate_task_resources( - task_config, - offer_resources, - ) - tasks_to_launch.append(prepared_task_config) - else: - tasks_to_defer.append(task_config) - - return tasks_to_launch, tasks_to_defer - - -class MesosTaskExecutor(MesosExecutor): - TASK_CONFIG_INTERFACE = MesosTaskConfig - - def __init__(self, role, *args, **kwargs) -> None: - super().__init__( - role, - MesosExecutorCallbacks( - get_tasks_for_offer, - mesos_update_to_event, - make_mesos_task_info, - ), - *args, - **kwargs, - ) diff --git a/task_processing/plugins/mesos/metrics.py b/task_processing/plugins/mesos/metrics.py deleted file mode 100644 index b2275fd9..00000000 --- a/task_processing/plugins/mesos/metrics.py +++ /dev/null @@ -1,22 +0,0 @@ -TASK_LAUNCHED_COUNT = "taskproc.mesos.task_launched_count" -TASK_FAILED_TO_LAUNCH_COUNT = "taskproc.mesos.tasks_failed_to_launch_count" -TASK_LAUNCH_FAILED_COUNT = "taskproc.mesos.task_launch_failed_count" -TASK_FINISHED_COUNT = "taskproc.mesos.task_finished_count" -TASK_FAILED_COUNT = "taskproc.mesos.task_failure_count" -TASK_KILLED_COUNT = "taskproc.mesos.task_killed_count" -TASK_LOST_COUNT = "taskproc.mesos.task_lost_count" -TASK_LOST_DUE_TO_INVALID_OFFER_COUNT = ( - "taskproc.mesos.task_lost_due_to_invalid_offer_count" -) -TASK_ERROR_COUNT = "taskproc.mesos.task_error_count" -TASK_OFFER_TIMEOUT = "taskproc.mesos.task_offer_timeout" - -TASK_ENQUEUED_COUNT = "taskproc.mesos.task_enqueued_count" -TASK_QUEUED_TIME_TIMER = "taskproc.mesos.task_queued_time" -TASK_INSUFFICIENT_OFFER_COUNT = "taskproc.mesos.task_insufficient_offer_count" -TASK_STUCK_COUNT = "taskproc.mesos.task_stuck_count" - -OFFER_DELAY_TIMER = "taskproc.mesos.offer_delay" -BLACKLISTED_AGENTS_COUNT = "taskproc.mesos.blacklisted_agents_count" - -BGCHECK_TIME_TIMER = "taskproc.mesos.bgcheck_time" diff --git a/task_processing/plugins/mesos/resource_helpers.py b/task_processing/plugins/mesos/resource_helpers.py deleted file mode 100644 index ebb65965..00000000 --- a/task_processing/plugins/mesos/resource_helpers.py +++ /dev/null @@ -1,97 +0,0 @@ -from typing import Tuple -from typing import TYPE_CHECKING - -import addict -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PRecord -from pyrsistent import PVector -from pyrsistent import pvector -from pyrsistent import v - -from task_processing.plugins.mesos.task_config import MesosTaskConfig - -NUMERIC_RESOURCE = field( - type=float, - initial=0.0, - factory=float, - invariant=lambda x: (x >= 0, "resource < 0"), -) -_NUMERIC_RESOURCES = frozenset(["cpus", "mem", "disk", "gpus"]) - - -class ResourceSet(PRecord): - cpus = NUMERIC_RESOURCE - mem = NUMERIC_RESOURCE - disk = NUMERIC_RESOURCE - gpus = NUMERIC_RESOURCE - ports = field( - type=(PVector[PMap] if TYPE_CHECKING else PVector), initial=v(), factory=pvector - ) - - -def get_offer_resources(offer: addict.Dict, role: str) -> ResourceSet: - """Get the resources from a Mesos offer - - :param offer: the payload from a Mesos resourceOffer call - :param role: the Mesos role we want to get resources for - :returns: a mapping from resource name -> available resources for the offer - """ - res = ResourceSet() - for resource in offer.resources: - if resource.role != role: - continue - - if resource.name in _NUMERIC_RESOURCES: - res = res.set(resource.name, resource.scalar.value) - elif resource.name == "ports": - res = res.set("ports", [pmap(r) for r in resource.ranges.range]) - return res - - -def allocate_task_resources( - task_config: MesosTaskConfig, - offer_resources: ResourceSet, -) -> Tuple[MesosTaskConfig, ResourceSet]: - """Allocate a task's resources to a Mesos offer - - :param task: the specification for the task to allocate - :param offer_resources: a mapping of resource name -> available resources - (should come from :func:`get_offer_resources`) - :returns: a pair of (`prepared_task_config`, `remaining_resources`), where - `prepared_task_config` is the task_config object modified with the - actual resources consumed - """ - for res, val in offer_resources.items(): - if res not in _NUMERIC_RESOURCES: - continue - offer_resources = offer_resources.set(res, val - task_config[res]) - - port = offer_resources.ports[0]["begin"] - if offer_resources.ports[0]["begin"] == offer_resources.ports[0]["end"]: - avail_ports = offer_resources.ports[1:] - else: - new_port_range = offer_resources.ports[0].set("begin", port + 1) - avail_ports = offer_resources.ports.set(0, new_port_range) - offer_resources = offer_resources.set("ports", avail_ports) - task_config = task_config.set("ports", v(m(begin=port, end=port))) - return task_config, offer_resources - - -def task_fits(task: MesosTaskConfig, offer_resources: ResourceSet) -> bool: - """Check to see if a task fits a given offer's resources - - :param task: the task specification to check - :param offer_resources: a mapping of resource name -> available resources - (should come from :func:`get_offer_resources`) - :returns: True if the offer has enough resources for the task, False otherwise - """ - for rname, value in offer_resources.items(): - if rname in _NUMERIC_RESOURCES and task[rname] > value: - return False - elif rname == "ports" and len(value) == 0: # TODO validate port ranges - return False - - return True diff --git a/task_processing/plugins/mesos/retrying_executor.py b/task_processing/plugins/mesos/retrying_executor.py deleted file mode 100644 index cc1bbd10..00000000 --- a/task_processing/plugins/mesos/retrying_executor.py +++ /dev/null @@ -1,167 +0,0 @@ -import logging -import time -from queue import Queue -from threading import Lock -from threading import Thread - -from pyrsistent import m - -from task_processing.interfaces.task_executor import TaskExecutor - -log = logging.getLogger(__name__) - - -class RetryingExecutor(TaskExecutor): - def __init__( - self, downstream_executor, retry_pred=lambda e: not e.success, retries=3 - ): - self.executor = downstream_executor - self.retries = retries - self.retry_pred = retry_pred - - self.task_retries = m() - self.task_retries_lock = Lock() - - self.src_queue = downstream_executor.get_event_queue() - self.dest_queue = Queue() - self.stopping = False - - self.retry_thread = Thread(target=self.retry_loop) - self.retry_thread.daemon = True - self.retry_thread.start() - - def event_with_retries(self, event): - return event.transform( - ("extensions", "RetryingExecutor/tries"), - "{}/{}".format(self.task_retries[event.task_id], self.retries), - ) - - def retry(self, event): - retries_remaining = self.task_retries[event.task_id] - if retries_remaining <= 0: - return False - - total_retries = self._task_or_executor_retries(event.task_config) - log.info( - "Retrying task {}, {} of {}, fail event: {}".format( - event.task_config.name, - total_retries - retries_remaining + 1, - total_retries, - event.raw, - ) - ) - - with self.task_retries_lock: - self.task_retries = self.task_retries.set( - event.task_id, retries_remaining - 1 - ) - self.run(event.task_config) - - return True - - def retry_loop(self): - while True: - while not self.src_queue.empty(): - e = self.src_queue.get() - - if e.kind != "task": - self.dest_queue.put(e) - continue - - # This is to remove trailing '-retry*' - original_task_id = "-".join( - [item for item in e.task_id.split("-")[:-1]] - ) - - # Check if the update is for current attempt. Discard if - # it is not. - if not self._is_current_attempt(e, original_task_id): - continue - - # Set the task id back to original task_id - e = self._restore_task_id(e, original_task_id) - - e = self.event_with_retries(e) - - if e.terminal: - if self.retry_pred(e): - if self.retry(e): - continue - - with self.task_retries_lock: - self.task_retries = self.task_retries.remove(e.task_id) - - self.dest_queue.put(e) - - if self.stopping: - return - - time.sleep(1) - - def run(self, task_config): - if task_config.task_id not in self.task_retries: - with self.task_retries_lock: - self.task_retries = self.task_retries.set( - task_config.task_id, self._task_or_executor_retries(task_config) - ) - self.executor.run(self._task_config_with_retry(task_config)) - - def reconcile(self, task_config): - self.executor.reconcile(task_config) - - def kill(self, task_id): - # retries = -1 so that manually killed tasks can be distinguished - with self.task_retries_lock: - self.task_retries = self.task_retries.set(task_id, -1) - return self.executor.kill(task_id) - - def stop(self): - self.executor.stop() - self.stopping = True - self.retry_thread.join() - - def get_event_queue(self): - return self.dest_queue - - def _task_config_with_retry(self, task_config): - return task_config.set( - uuid="{id}-retry{attempt}".format( - id=task_config.uuid, attempt=self.task_retries[task_config.task_id] - ) - ) - - def _restore_task_id(self, e, original_task_id): - task_config = e.task_config.set( - uuid="-".join([item for item in str(e.task_config.uuid).split("-")[:-1]]) - ) - - # Set the task id back to original task_id - return e.set( - task_id=original_task_id, - task_config=task_config, - ) - - def _is_current_attempt(self, e, original_task_id): - retry_suffix = "-".join([item for item in e.task_id.split("-")[-1:]]) - - # This is to extract retry attempt from retry_suffix - # eg: if retry_suffix= 'retry2', then attempt==2 - attempt = int(retry_suffix[5:]) - - # This is to reregister a task with the retry executor in the event - # of reconciliation and attempts were lost - with self.task_retries_lock: - if original_task_id not in self.task_retries: - self.task_retries = self.task_retries.set( - original_task_id, - attempt, - ) - return True - - if attempt == self.task_retries[original_task_id]: - return True - - return False - - def _task_or_executor_retries(self, task_config): - return task_config.retries if "retries" in task_config else self.retries diff --git a/task_processing/plugins/mesos/task_config.py b/task_processing/plugins/mesos/task_config.py deleted file mode 100644 index 7b7ca5a8..00000000 --- a/task_processing/plugins/mesos/task_config.py +++ /dev/null @@ -1,135 +0,0 @@ -import uuid -from typing import Sequence -from typing import TYPE_CHECKING - -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PVector -from pyrsistent import pvector -from pyrsistent import v - -from task_processing.interfaces.task_executor import DefaultTaskConfigInterface -from task_processing.plugins.mesos.constraints import Constraint -from task_processing.plugins.mesos.constraints import valid_constraint_operator_name - -VOLUME_KEYS = set(["mode", "container_path", "host_path"]) - - -def valid_volumes(volumes): - for vol in volumes: - if set(vol.keys()) != VOLUME_KEYS: - return ( - False, - "Invalid volume format, must only contain following keys: " - "{}, was: {}".format(VOLUME_KEYS, vol.keys()), - ) - return (True, None) - - -def _valid_constraints(constraints): - invalid_operators = [] - for constraint in constraints: - operator_name = constraint.operator - if not valid_constraint_operator_name(operator_name): - invalid_operators.append(operator_name) - if len(invalid_operators) > 0: - return (False, "Invalid constraint operator names: {}".format(operator_name)) - else: - return (True, None) - - -class MesosTaskConfig(DefaultTaskConfigInterface): - def __invariant__(conf): - return ( - ( - "image" in conf if conf.containerizer == "DOCKER" else True, - "Image required for chosen containerizer", - ), - ( - len(conf.task_id) <= 255, - "task_id is longer than 255 chars: {}".format(conf.task_id), - ), - ) - - uuid = field(type=(str, uuid.UUID), initial=uuid.uuid4) # type: ignore - name = field(type=str, initial="default") - # image is optional for the mesos containerizer - image = field(type=str) - cmd = field( - type=str, mandatory=True, invariant=lambda cmd: (cmd.strip() != "", "empty cmd") - ) - cpus = field( - type=float, initial=0.1, factory=float, invariant=lambda c: (c > 0, "cpus > 0") - ) - mem = field( - type=float, - initial=32.0, - factory=float, - invariant=lambda m: (m >= 32, "mem is >= 32"), - ) - disk = field( - type=float, initial=10.0, factory=float, invariant=lambda d: (d > 0, "disk > 0") - ) - gpus = field( - type=int, initial=0, factory=int, invariant=lambda g: (g >= 0, "gpus >= 0") - ) - timeout = field( - type=float, - factory=float, - mandatory=False, - invariant=lambda t: (t > 0, "timeout > 0"), - ) - # By default, the retrying executor retries 3 times. This task option - # overrides the executor setting. - retries = field( - type=int, - factory=int, - mandatory=False, - invariant=lambda r: (r >= 0, "retries >= 0"), - ) - volumes = field(type=PVector, initial=v(), factory=pvector, invariant=valid_volumes) - ports = field( - type=(PVector[PMap] if TYPE_CHECKING else PVector), initial=v(), factory=pvector - ) - cap_add = field(type=PVector, initial=v(), factory=pvector) - ulimit = field(type=PVector, initial=v(), factory=pvector) - uris = field(type=PVector, initial=v(), factory=pvector) - # TODO: containerization + containerization_args ? - docker_parameters = field(type=PVector, initial=v(), factory=pvector) - containerizer = field( - type=str, - initial="DOCKER", - invariant=lambda c: ( - c == "DOCKER" or c == "MESOS", - "containerizer is docker or mesos", - ), - ) - environment = field(type=PMap, initial=m(), factory=pmap) - offer_timeout = field( - type=float, - initial=60.0, - factory=float, - invariant=lambda t: (t > 0, "timeout > 0"), - ) - constraints = field( - type=(Sequence[Constraint] if TYPE_CHECKING else PVector), - initial=v(), - factory=lambda c: pvector( - (Constraint(attribute=v[0], operator=v[1], value=v[2]) for v in c) - ), - invariant=_valid_constraints, - ) - use_cached_image = field(type=bool, initial=True, factory=bool) - - @property - def task_id(self): - return "{}.{}".format(self.name, self.uuid) - - def set_task_id(self, task_id): - try: - name, uuid = task_id.rsplit(".", maxsplit=1) - except ValueError: - raise ValueError(f"Invalid format for task_id {task_id}") - return self.set(name=name, uuid=uuid) diff --git a/task_processing/plugins/mesos/timeout_executor.py b/task_processing/plugins/mesos/timeout_executor.py deleted file mode 100644 index e7778ba9..00000000 --- a/task_processing/plugins/mesos/timeout_executor.py +++ /dev/null @@ -1,132 +0,0 @@ -import collections -import logging -import time -from queue import Queue -from threading import Lock -from threading import Thread - -from task_processing.interfaces.task_executor import TaskExecutor - -log = logging.getLogger(__name__) - -TaskEntry = collections.namedtuple("TaskEntry", ["task_id", "deadline"]) - - -class TimeoutExecutor(TaskExecutor): - def __init__(self, downstream_executor): - self.downstream_executor = downstream_executor - - self.tasks_lock = Lock() - # Tasks that are pending termination - self.killed_tasks = [] - # Tasks that are currently running - self.running_tasks = [] - - self.src_queue = downstream_executor.get_event_queue() - self.dest_queue = Queue() - self.stopping = False - - self.timeout_thread = Thread(target=self.timeout_loop) - self.timeout_thread.daemon = True - self.timeout_thread.start() - - def timeout_loop(self): - while True: - # process downstream events - while not self.src_queue.empty(): - e = self.src_queue.get() - self.dest_queue.put(e) - - if not e.kind == "task": - continue - elif not e.terminal: - with self.tasks_lock: - if e.task_id not in [ - entry.task_id for entry in self.running_tasks - ]: - # No record of e's task_id in self.running_tasks, - # so we need to add it back in. We lack access to - # the original time the task was started, so to set - # a deadline, we use e's timestamp as a baseline. - new_entry = TaskEntry( - task_id=e.task_id, - deadline=e.task_config.timeout + e.timestamp, - ) - self._insert_new_running_task_entry(new_entry) - else: - # Update running and killed tasks - with self.tasks_lock: - for idx, entry in enumerate(self.running_tasks): - if e.task_id == entry.task_id: - self.running_tasks.pop(idx) - break - if e.task_id in self.killed_tasks: - self.killed_tasks.remove(e.task_id) - - # Check timeouts - current_time = time.time() - with self.tasks_lock: - delete_idx = None - for idx, entry in enumerate(self.running_tasks): - if entry.deadline < current_time: - log.info("Killing task {}: timed out".format(entry.task_id)) - self.downstream_executor.kill(entry.task_id) - self.killed_tasks.append(entry.task_id) - delete_idx = idx - # Skip the rest of tasks in the list because they are - # appended to the list later. - else: - break - if delete_idx is not None: - self.running_tasks = self.running_tasks[delete_idx + 1 :] - - if self.stopping: - return - - # Since src_queue has to be polled continuously, sleep(1) is used. - # Otherwise, a notify() from run() plus wait(delta between now and - # the earliest deadline) is more efficient. - time.sleep(1) - - def run(self, task_config): - # Tasks are dynamically added and removed from running_tasks and - # and killed_tasks. It's preferable for the client or execution - # framework to check for duplicated tasks. The duplicate task check does - # NOT happen here. - new_entry = TaskEntry( - task_id=task_config.task_id, deadline=task_config.timeout + time.time() - ) - with self.tasks_lock: - self._insert_new_running_task_entry(new_entry) - - self.downstream_executor.run(task_config) - - def reconcile(self, task_config): - self.downstream_executor.reconcile(task_config) - - def kill(self, task_id): - with self.tasks_lock: - for idx, entry in enumerate(self.running_tasks): - if task_id == entry.task_id: - log.info("Killing task {}: requested".format(task_id)) - result = self.downstream_executor.kill(task_id) - if result is not False: - self.running_tasks.pop(idx) - self.killed_tasks.append(task_id) - return result - - def stop(self): - self.downstream_executor.stop() - self.stopping = True - self.timeout_thread.join() - - def get_event_queue(self): - return self.dest_queue - - def _insert_new_running_task_entry(self, new_entry): - # Insertion sort for task entries in self.running_tasks - for idx, entry in enumerate(self.running_tasks): - if new_entry.deadline <= entry.deadline: - self.running_tasks.insert(idx, new_entry) - return - self.running_tasks.append(new_entry) diff --git a/task_processing/plugins/mesos/translator.py b/task_processing/plugins/mesos/translator.py deleted file mode 100644 index 421ba9ab..00000000 --- a/task_processing/plugins/mesos/translator.py +++ /dev/null @@ -1,146 +0,0 @@ -import time -from typing import List - -import addict -from pyrsistent import thaw - -from task_processing.interfaces.event import Event -from task_processing.interfaces.event import task_event -from task_processing.plugins.mesos.task_config import MesosTaskConfig - -# https://github.com/apache/mesos/blob/master/include/mesos/mesos.proto - - -def make_mesos_container_info(task_config: MesosTaskConfig) -> addict.Dict: - container_info = addict.Dict( - type=task_config.containerizer, - volumes=thaw(task_config.volumes), - ) - port_mappings = [ - addict.Dict(host_port=task_config.ports[0]["begin"], container_port=8888) - ] - if container_info.type == "DOCKER": - container_info.docker = addict.Dict( - image=task_config.image, - network="BRIDGE", - port_mappings=port_mappings, - parameters=thaw(task_config.docker_parameters), - force_pull_image=(not task_config.use_cached_image), - ) - elif container_info.type == "MESOS": - container_info.network_infos = addict.Dict(port_mappings=port_mappings) - # For this to work, image_providers needs to be set to 'docker' on mesos agents (as opposed - # to 'appc' or 'oci'; we're still running docker images, we're just - # using the UCR to do it). - if "image" in task_config: - container_info.mesos.image = addict.Dict( - type="DOCKER", # not 'APPC' or 'OCI' - docker=addict.Dict(name=task_config.image), - cached=task_config.use_cached_image, - ) - return container_info - - -def make_mesos_resources( - task_config: MesosTaskConfig, - role: str, -) -> List[addict.Dict]: - return [ - addict.Dict( - name="cpus", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.cpus), - ), - addict.Dict( - name="mem", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.mem), - ), - addict.Dict( - name="disk", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.disk), - ), - addict.Dict( - name="gpus", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.gpus), - ), - addict.Dict( - name="ports", - type="RANGES", - role=role, - ranges=addict.Dict(range=thaw(task_config.ports)), - ), - ] - - -def make_mesos_command_info(task_config: MesosTaskConfig) -> addict.Dict: - return addict.Dict( - value=task_config.cmd, - uris=[addict.Dict(value=uri, extract=False) for uri in task_config.uris], - environment=make_task_environment_variables(task_config=task_config), - ) - - -def make_task_environment_variables(task_config: MesosTaskConfig) -> addict.Dict: - env = dict(task_config.environment.items()) - env["MESOS_TASK_ID"] = task_config.task_id # type: ignore - return addict.Dict(variables=[addict.Dict(name=k, value=v) for k, v in env.items()]) - - -def make_mesos_task_info( - task_config: MesosTaskConfig, - agent_id: str, - role: str, -) -> addict.Dict: - container_info = make_mesos_container_info(task_config) - resources = make_mesos_resources(task_config, role) - command_info = make_mesos_command_info(task_config) - - return addict.Dict( - task_id=addict.Dict(value=task_config.task_id), - agent_id=addict.Dict(value=agent_id), - name=f"executor-{task_config.task_id}", - resources=resources, - command=command_info, - container=container_info, - ) - - -MESOS_STATUS_MAP = { - "TASK_STARTING": addict.Dict(platform_type="starting", terminal=False), - "TASK_RUNNING": addict.Dict(platform_type="running", terminal=False), - "TASK_FINISHED": addict.Dict(platform_type="finished", terminal=True, success=True), - "TASK_FAILED": addict.Dict(platform_type="failed", terminal=True, success=False), - "TASK_KILLED": addict.Dict(platform_type="killed", terminal=True, success=False), - "TASK_LOST": addict.Dict(platform_type="lost", terminal=True, success=False), - "TASK_STAGING": addict.Dict(platform_type="staging", terminal=False), - "TASK_ERROR": addict.Dict(platform_type="error", terminal=True, success=False), - "TASK_KILLING": addict.Dict(platform_type="killing", terminal=False), - "TASK_DROPPED": addict.Dict(platform_type="dropped", terminal=True, success=False), - "TASK_UNREACHABLE": addict.Dict(platform_type="unreachable", terminal=False), - "TASK_GONE": addict.Dict(platform_type="gone", terminal=True, success=False), - "TASK_GONE_BY_OPERATOR": addict.Dict( - platform_type="gone_by_operator", terminal=True, success=False - ), - "TASK_UNKNOWN": addict.Dict(platform_type="unknown", terminal=False), - "TASK_STUCK": addict.Dict(platform_type="unknown", terminal=False), -} - - -def mesos_update_to_event( - mesos_status: addict.Dict, task_config: MesosTaskConfig -) -> Event: - kwargs = dict( - raw=mesos_status, - task_id=task_config.task_id, - task_config=task_config, - timestamp=time.time(), - ) - kwargs.update(MESOS_STATUS_MAP[mesos_status.state]) - return task_event(**kwargs) diff --git a/task_processing/plugins/persistence/__init__.py b/task_processing/plugins/persistence/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/task_processing/plugins/persistence/dynamodb_persistence.py b/task_processing/plugins/persistence/dynamodb_persistence.py deleted file mode 100644 index 0c84be52..00000000 --- a/task_processing/plugins/persistence/dynamodb_persistence.py +++ /dev/null @@ -1,72 +0,0 @@ -import decimal - -import boto3.session as bsession -from boto3.dynamodb.conditions import Key -from pyrsistent import thaw - -from task_processing.interfaces.persistence import Persister - - -class DynamoDBPersister(Persister): - def __init__(self, table_name, endpoint_url=None, session=None): - self.table_name = table_name - if not session: - session = bsession.Session() - self.ddb_client = session.client( - service_name="dynamodb", - endpoint_url=endpoint_url, - ) - self.table = session.resource( - endpoint_url=endpoint_url, service_name="dynamodb" - ).Table(table_name) - - def read(self, task_id, comparison_operator="EQ"): - res = self.table.query(KeyConditionExpression=Key("task_id").eq(task_id)) - return [self.item_to_event(item) for item in res["Items"]] - - def write(self, event): - if event.kind == "control": - return None - return self.ddb_client.put_item( - TableName=self.table_name, Item=self._event_to_item(event)["M"] - ) - - def _event_to_item(self, e): - raw = thaw(e) - if type(raw) is dict: - resp = {} - for k, v in raw.items(): - if type(v) is str: - resp[k] = {"S": v} - elif type(v) is bool: - resp[k] = {"BOOL": v} - elif isinstance(v, (int, float)): - resp[k] = {"N": str(v)} - elif type(v) is dict: - resp[k] = self._event_to_item(v) - elif type(v) is list: - if len(v) > 0: - vals = [] - for i in v: - vals.append(self._event_to_item(i)) - resp[k] = {"L": vals} - return {"M": resp} - elif type(raw) is str: - return {"S": raw} - elif type(raw) in [int, float]: - return {"N": str(raw)} - else: - print("Missed converting key %s type %s" % (raw, type(raw))) - - def item_to_event(self, obj): - return self._replace_decimals(obj) - - def _replace_decimals(self, obj): - if isinstance(obj, list): - return [self._replace_decimals(x) for x in obj] - elif isinstance(obj, dict): - return {k: self._replace_decimals(v) for k, v in obj.items()} - elif isinstance(obj, decimal.Decimal): - return float(obj) - else: - return obj diff --git a/task_processing/plugins/persistence/file_persistence.py b/task_processing/plugins/persistence/file_persistence.py deleted file mode 100644 index 30bd297b..00000000 --- a/task_processing/plugins/persistence/file_persistence.py +++ /dev/null @@ -1,27 +0,0 @@ -import json - -from pyrsistent import thaw -from pyrsistent import v - -from task_processing.interfaces.event import Event -from task_processing.interfaces.event import json_deserializer -from task_processing.interfaces.event import json_serializer -from task_processing.interfaces.persistence import Persister - - -class FilePersistence(Persister): - def __init__(self, output_file): - self.output_file = output_file - - def read(self, task_id): - acc = v() - with open(self.output_file, "r") as f: - for line in f: - parsed = json.loads(line, object_hook=json_deserializer) - if parsed["task_id"] == task_id: - acc = acc.append(Event.create(parsed)) - return acc - - def write(self, event): - with open(self.output_file, "a+") as f: - f.write("{}\n".format(json.dumps(thaw(event), default=json_serializer))) diff --git a/task_processing/plugins/stateful/__init__.py b/task_processing/plugins/stateful/__init__.py deleted file mode 100644 index 3b3ffbab..00000000 --- a/task_processing/plugins/stateful/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .stateful_executor import StatefulTaskExecutor - - -TASK_PROCESSING_PLUGIN = "stateful_plugin" - - -def register_plugin(registry): - return registry.register_task_executor("stateful", StatefulTaskExecutor) diff --git a/task_processing/plugins/stateful/stateful_executor.py b/task_processing/plugins/stateful/stateful_executor.py deleted file mode 100644 index 252fb09a..00000000 --- a/task_processing/plugins/stateful/stateful_executor.py +++ /dev/null @@ -1,49 +0,0 @@ -import logging -import threading -import traceback -from queue import Queue - -from task_processing.interfaces.task_executor import TaskExecutor - -log = logging.getLogger(__name__) - - -class StatefulTaskExecutor(TaskExecutor): - """ """ - - def __init__(self, downstream_executor, persister): - self.downstream_executor = downstream_executor - self.writer_queue = Queue() - self.queue_for_processed_events = Queue() - self.persister = persister - worker_thread = threading.Thread(target=self.subscribe_to_updates_for_task) - worker_thread.daemon = True - worker_thread.start() - - def run(self, task_config): - self.downstream_executor.run(task_config) - - def reconcile(self, task_config): - self.downstream_executor.reconcile(task_config) - - def kill(self, task_id): - return self.downstream_executor.kill(task_id) - - def status(self, task_id): - return sorted(self.persister.read(task_id), key=lambda x: x["timestamp"]) - - def stop(self): - return self.downstream_executor.stop() - - def get_event_queue(self): - return self.queue_for_processed_events - - def subscribe_to_updates_for_task(self): - while True: - result = self.downstream_executor.get_event_queue().get() - try: - self.persister.write(event=result) - except Exception: - log.error(traceback.format_exc()) - self.queue_for_processed_events.put(result) - self.downstream_executor.get_event_queue().task_done() diff --git a/tests/integration/cluster b/tests/integration/cluster deleted file mode 120000 index c70625a2..00000000 --- a/tests/integration/cluster +++ /dev/null @@ -1 +0,0 @@ -../../examples/cluster \ No newline at end of file diff --git a/tests/integration/mesos/features/sync.feature b/tests/integration/mesos/features/sync.feature deleted file mode 100644 index 4c539496..00000000 --- a/tests/integration/mesos/features/sync.feature +++ /dev/null @@ -1,9 +0,0 @@ -Feature: Mesos Sync runner - -Scenario: Running single task - Given working mesos platform - And mesos executor with sync runner - When I launch a task - Then it should block until finished - And print status running - And print status finished diff --git a/tests/integration/mesos/mesos_test.py b/tests/integration/mesos/mesos_test.py deleted file mode 100644 index 6726bd19..00000000 --- a/tests/integration/mesos/mesos_test.py +++ /dev/null @@ -1,29 +0,0 @@ -from pytest_bdd import given -from pytest_bdd import then -from pytest_bdd import when - -from task_processing.plugins.mesos.mesos_executor import MesosExecutor -from task_processing.runners.sync import Sync - - -@given("mesos executor with {runner} runner") -def mesos_executor_runner(runner): - executor = MesosExecutor(role="mock-role") - - if runner == "sync": - runner_instance = Sync(executor=executor) - else: - raise "unknown runner: {}".format(runner) - - return {"executor": executor, "runner": runner_instance} - - -@when("I launch a task") -def launch_task(mesos_executor_runner): - print(mesos_executor_runner) - return - - -@then("it should block until finished") -def block_until_finished(): - return diff --git a/tests/unit/plugins/mesos/conftest.py b/tests/unit/plugins/mesos/conftest.py deleted file mode 100644 index 7e6b0894..00000000 --- a/tests/unit/plugins/mesos/conftest.py +++ /dev/null @@ -1,86 +0,0 @@ -import addict -import mock -import pytest -from pyrsistent import m -from pyrsistent import v - -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def fake_task(): - return MesosTaskConfig( - name="fake_name", - cpus=10.0, - mem=1024.0, - disk=1000.0, - gpus=1, - ports=v(m(begin=31200, end=31200)), - image="fake_image", - cmd='echo "fake"', - ) - - -@pytest.fixture -def fake_offer(): - return addict.Dict( - id=addict.Dict(value="fake_offer_id"), - agent_id=addict.Dict(value="fake_agent_id"), - hostname="fake_hostname", - resources=[ - addict.Dict( - role="fake_role", - name="cpus", - scalar=addict.Dict(value=10), - type="SCALAR", - ), - addict.Dict( - role="other_fake_role", - name="cpus", - scalar=addict.Dict(value=20), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="mem", - scalar=addict.Dict(value=1024), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="disk", - scalar=addict.Dict(value=1000), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="gpus", - scalar=addict.Dict(value=1), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="ports", - ranges=addict.Dict(range=[addict.Dict(begin=31200, end=31500)]), - type="RANGES", - ), - ], - attributes=[ - addict.Dict(name="pool", text=addict.Dict(value="fake_pool_text")), - addict.Dict( - name="region", - text=addict.Dict(value="fake_region_text"), - ), - ], - ) - - -@pytest.fixture -def mock_fw_and_driver(): - with mock.patch( - "task_processing.plugins.mesos.mesos_executor.ExecutionFramework" - ) as mock_execution_framework, mock.patch( - "task_processing.plugins.mesos.mesos_executor.MesosSchedulerDriver" - ) as mock_scheduler_driver: - mock_execution_framework.return_value.framework_info = mock.Mock() - yield mock_execution_framework, mock_scheduler_driver diff --git a/tests/unit/plugins/mesos/constraints_test.py b/tests/unit/plugins/mesos/constraints_test.py deleted file mode 100644 index c7f73d54..00000000 --- a/tests/unit/plugins/mesos/constraints_test.py +++ /dev/null @@ -1,284 +0,0 @@ -import pytest -from pyrsistent import m - -from task_processing.plugins.mesos.constraints import attributes_match_constraints -from task_processing.plugins.mesos.constraints import Constraint - - -@pytest.fixture -def fake_dict(): - return m( - pool="fake_pool_text", - region="fake_region_text", - ) - - -def test_constraints_eq_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="==", - value="random_text", - ), - ], - ) - - -def test_constraints_eq_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="another_fake_region_text", - ), - ], - ) - - -def test_constraints_EQUALS_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="EQUALS", - value="fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="EQUALS", - value="random_text", - ), - ], - ) - - -def test_constraints_EQUALS_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="EQUALS", - value="another_fake_region_text", - ), - ], - ) - - -def test_constraints_ne_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="!=", - value="another_fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="!=", - value="random_text", - ), - ], - ) - - -def test_constraints_ne_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="!=", - value="fake_region_text", - ), - ], - ) - - -def test_constraints_NOTEQUALS_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="NOTEQUALS", - value="another_fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="NOTEQUALS", - value="random_text", - ), - ], - ) - - -def test_constraints_NOTEQUALS_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="NOTEQUALS", - value="fake_region_text", - ), - ], - ) - - -def test_constraints_LIKE_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="LIKE", - value="fak.*t..t", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="LIKE", - value="random_text", - ), - ], - ) - - -def test_constraints_LIKE_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="LIKE", - value="another_fak.*t..t", - ), - ], - ) - assert not attributes_match_constraints( - fake_dict, - [Constraint(attribute="region", operator="LIKE", value="fake_region")], - ) - - -def test_constraints_UNLIKE_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="UNLIKE", - value="another_fak.*t..t", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="UNLIKE", - value="random_text", - ), - ], - ) - - -def test_constraints_UNLIKE_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="UNLIKE", - value="fak.*t..t", - ), - ], - ) - - -def test_constraints_all_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="fake_region_text", - ), - Constraint( - attribute="pool", - operator="==", - value="fake_pool_text", - ), - ], - ) - - -def test_constraints_all_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="another_fake_region_text", - ), - Constraint( - attribute="pool", - operator="==", - value="fake_pool_text", - ), - ], - ) - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="fake_region_text", - ), - Constraint( - attribute="pool", - operator="==", - value="another_fake_pool_text", - ), - ], - ) diff --git a/tests/unit/plugins/mesos/execution_framework_test.py b/tests/unit/plugins/mesos/execution_framework_test.py deleted file mode 100644 index ec319daa..00000000 --- a/tests/unit/plugins/mesos/execution_framework_test.py +++ /dev/null @@ -1,568 +0,0 @@ -import socket -import time -from queue import Queue - -import mock -import pytest -from addict import Dict -from pyrsistent import m - -from task_processing.plugins.mesos import metrics -from task_processing.plugins.mesos.constraints import attributes_match_constraints -from task_processing.plugins.mesos.execution_framework import ExecutionFramework -from task_processing.plugins.mesos.execution_framework import TaskMetadata -from task_processing.plugins.mesos.mesos_executor import MesosExecutorCallbacks -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def ef(mock_Thread): - return ExecutionFramework("fake_name", "fake_role", mock.Mock(), 240) - - -@pytest.fixture -def mock_driver(): - with mock.patch("pymesos.MesosSchedulerDriver", autospec=True) as m: - m.id = "mock_driver" - yield m - - -@pytest.fixture -def mock_get_metric(): - with mock.patch( - "task_processing.plugins.mesos.execution_framework.get_metric", - ) as mock_get_metric: - yield mock_get_metric - - -@pytest.fixture -def mock_time(): - with mock.patch.object(time, "time") as mock_time: - yield mock_time - - -@pytest.fixture -def mock_sleep(ef): - def stop_killing(task_id): - ef.stopping = True - - with mock.patch.object(time, "sleep", side_effect=stop_killing) as mock_sleep: - yield mock_sleep - - -def test_ef_kills_stuck_tasks(ef, fake_task, mock_sleep, mock_get_metric): - task_id = fake_task.task_id - task_metadata = TaskMetadata( - agent_id="fake_agent_id", - task_config=fake_task, - task_state="TASK_STAGING", - task_state_history=m(TASK_STAGING=0.0), - ) - ef.task_staging_timeout_s = 0 - ef.kill_task = mock.Mock() - ef.blacklist_slave = mock.Mock() - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.callbacks = MesosExecutorCallbacks(mock.Mock(), mock.Mock(), mock.Mock()) - - ef._background_check() - - assert ef.kill_task.call_count == 1 - assert ef.kill_task.call_args == mock.call(task_id) - assert ef.blacklist_slave.call_count == 1 - assert ef.blacklist_slave.call_args == mock.call( - agent_id="fake_agent_id", timeout=900 - ) - assert mock_get_metric.call_count == 2 - assert mock_get_metric.call_args_list == [ - mock.call(metrics.TASK_STUCK_COUNT), - mock.call(metrics.BGCHECK_TIME_TIMER), - ] - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_reenqueue_tasks_stuck_in_unknown_state( - ef, fake_task, mock_sleep, mock_get_metric -): - task_id = fake_task.task_id - task_metadata = TaskMetadata( - agent_id="fake_agent_id", - task_config=fake_task, - task_state="UNKNOWN", - task_state_history=m(UNKNOWN=0.0), - ) - ef.task_staging_timeout_s = 0 - ef.kill_task = mock.Mock() - ef.blacklist_slave = mock.Mock() - ef.enqueue_task = mock.Mock() - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - - ef._background_check() - - assert ef.enqueue_task.call_count == 1 - assert ef.enqueue_task.call_args == mock.call(ef.task_metadata[task_id].task_config) - assert mock_get_metric.call_count == 2 - assert mock_get_metric.call_args_list == [ - mock.call(metrics.TASK_FAILED_TO_LAUNCH_COUNT), - mock.call(metrics.BGCHECK_TIME_TIMER), - ] - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_offer_matches_pool_no_pool(ef, fake_offer): - match, _ = ef.offer_matches_pool(fake_offer) - assert match - - -def test_offer_matches_pool_match(ef, fake_offer): - ef.pool = "fake_pool_text" - match, _ = ef.offer_matches_pool(fake_offer) - - assert match - - -def test_offer_matches_pool_no_match(ef, fake_offer): - ef.pool = "fake_other_pool_text" - match, _ = ef.offer_matches_pool(fake_offer) - - assert not match - - -def test_offer_matches_constraints_no_constraints(ef, fake_task, fake_offer): - attributes = { - attribute.name: attribute.value for attribute in fake_offer.attributes - } - match = attributes_match_constraints(attributes, fake_task.constraints) - assert match - - -def test_offer_matches_constraints_match(ef, fake_offer): - attributes = { - attribute.name: attribute.text.value for attribute in fake_offer.attributes - } - fake_task = MesosTaskConfig( - image="fake_image", - cmd='echo "fake"', - constraints=[ - ["region", "==", "fake_region_text"], - ], - ) - match = attributes_match_constraints(attributes, fake_task.constraints) - assert match - - -def test_offer_matches_constraints_no_match(ef, fake_offer): - attributes = { - attribute.name: attribute.text.value for attribute in fake_offer.attributes - } - fake_task = MesosTaskConfig( - image="fake_image", - cmd='echo "fake"', - constraints=[ - ["region", "==", "another_fake_region_text"], - ], - ) - match = attributes_match_constraints(attributes, fake_task.constraints) - assert not match - - -def test_kill_task(ef, mock_driver): - ef._driver = mock_driver - - ef.kill_task("fake_task_id") - - assert mock_driver.killTask.call_count == 1 - assert mock_driver.killTask.call_args == mock.call(Dict(value="fake_task_id")) - - -def test_kill_task_from_task_queue(ef, mock_driver): - ef.driver = mock_driver - ef.task_queue = Queue() - ef.task_queue.put(mock.Mock(task_id="fake_task_id")) - ef.task_queue.put(mock.Mock(task_id="fake_task_id1")) - - ef.kill_task("fake_task_id") - - assert mock_driver.killTask.call_count == 0 - assert ef.task_queue.qsize() == 1 - - -def test_blacklist_slave(ef, mock_get_metric, mock_time): - agent_id = "fake_agent_id" - mock_time.return_value = 2.0 - - ef.blacklisted_slaves = ef.blacklisted_slaves.append(agent_id) - ef.blacklist_slave(agent_id, timeout=2.0) - - assert agent_id in ef.blacklisted_slaves - assert mock_get_metric.call_count == 1 - assert mock_get_metric.call_args == mock.call(metrics.BLACKLISTED_AGENTS_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_unblacklist_slave(ef, mock_time, mock_sleep): - agent_id = "fake_agent_id" - - ef.blacklisted_slaves = ef.blacklisted_slaves.append(agent_id) - ef.unblacklist_slave(agent_id, timeout=0.0) - - assert agent_id not in ef.blacklisted_slaves - - -def test_enqueue_task(ef, fake_task, mock_driver, mock_get_metric): - ef.are_offers_suppressed = True - ef._driver = mock_driver - - ef.enqueue_task(fake_task) - - assert ef.task_metadata[fake_task.task_id].task_state == "TASK_INITED" - assert not ef.task_queue.empty() - assert mock_driver.reviveOffers.call_count == 1 - assert not ef.are_offers_suppressed - assert mock_get_metric.call_count == 1 - assert mock_get_metric.call_args == mock.call(metrics.TASK_ENQUEUED_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_stop(ef): - ef.stop() - - assert ef.stopping - - -def test_initialize_metrics(ef): - default_dimensions = {"framework_name": "fake_name", "framework_role": "fake_role"} - with mock.patch( - "task_processing.plugins.mesos.execution_framework.create_counter", - ) as mock_create_counter, mock.patch( - "task_processing.plugins.mesos.execution_framework.create_timer", - ) as mock_create_timer: - ef._initialize_metrics() - - counters = [ - metrics.TASK_LAUNCHED_COUNT, - metrics.TASK_FINISHED_COUNT, - metrics.TASK_FAILED_COUNT, - metrics.TASK_LAUNCH_FAILED_COUNT, - metrics.TASK_FAILED_TO_LAUNCH_COUNT, - metrics.TASK_KILLED_COUNT, - metrics.TASK_LOST_COUNT, - metrics.TASK_LOST_DUE_TO_INVALID_OFFER_COUNT, - metrics.TASK_ERROR_COUNT, - metrics.TASK_ENQUEUED_COUNT, - metrics.TASK_INSUFFICIENT_OFFER_COUNT, - metrics.TASK_STUCK_COUNT, - metrics.BLACKLISTED_AGENTS_COUNT, - metrics.TASK_OFFER_TIMEOUT, - ] - assert mock_create_counter.call_count == len(counters) - for cnt in counters: - mock_create_counter.assert_any_call(cnt, default_dimensions) - - timers = [ - metrics.TASK_QUEUED_TIME_TIMER, - metrics.OFFER_DELAY_TIMER, - metrics.BGCHECK_TIME_TIMER, - ] - assert mock_create_timer.call_count == len(timers) - for tmr in timers: - mock_create_timer.assert_any_call(tmr, default_dimensions) - - -def test_slave_lost(ef, mock_driver): - ef.slaveLost(mock_driver, "fake_slave_id") - - -def test_registered(ef, mock_driver): - ef.registered(mock_driver, Dict(value="fake_framework_id"), "fake_master_info") - - assert ef._driver == mock_driver - assert ef.event_queue.qsize() == 1 - - -def test_reregistered(ef, mock_driver): - ef.reregistered(mock_driver, "fake_master_info") - - -def test_resource_offers_launch( - ef, fake_task, fake_offer, mock_driver, mock_get_metric, mock_time -): - task_id = fake_task.task_id - ef.driver = mock_driver - ef._last_offer_time = 1.0 - mock_time.return_value = 2.0 - ef.suppress_after = 0.0 - ef.offer_matches_pool = mock.Mock(return_value=(True, None)) - task_metadata = TaskMetadata( - task_config=fake_task, - task_state="fake_state", - task_state_history=m(fake_state=time.time(), TASK_INITED=time.time()), - ) - fake_task_2 = mock.Mock() - ef.callbacks.get_tasks_for_offer = mock.Mock( - return_value=([fake_task], [fake_task_2]) - ) - - ef.task_queue.put(fake_task) - ef.task_queue.put(fake_task_2) - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.resourceOffers(ef.driver, [fake_offer]) - - assert ef.task_metadata[task_id].agent_id == "fake_agent_id" - assert mock_driver.suppressOffers.call_count == 0 - assert not ef.are_offers_suppressed - assert mock_driver.declineOffer.call_count == 0 - assert mock_driver.launchTasks.call_count == 1 - assert mock_get_metric.call_count == 4 - mock_get_metric.assert_any_call(metrics.OFFER_DELAY_TIMER) - mock_get_metric.assert_any_call(metrics.TASK_LAUNCHED_COUNT) - mock_get_metric.assert_any_call(metrics.TASK_QUEUED_TIME_TIMER) - mock_get_metric.assert_any_call(metrics.TASK_INSUFFICIENT_OFFER_COUNT) - assert mock_get_metric.return_value.record.call_count == 2 - assert mock_get_metric.return_value.count.call_count == 2 - - -def test_resource_offers_launch_tasks_failed( - ef, fake_task, fake_offer, mock_driver, mock_get_metric, mock_time -): - task_id = fake_task.task_id - ef.driver = mock_driver - ef.driver.launchTasks = mock.Mock(side_effect=socket.timeout) - ef._last_offer_time = None - mock_time.return_value = 2.0 - ef.suppress_after = 0.0 - ef.offer_matches_pool = mock.Mock(return_value=(True, None)) - task_metadata = TaskMetadata( - task_config=fake_task, - task_state="fake_state", - task_state_history=m(fake_state=time.time(), TASK_INITED=time.time()), - ) - ef.callbacks.get_tasks_for_offer = mock.Mock(return_value=([fake_task], [])) - ef.task_queue.put(fake_task) - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.resourceOffers(ef.driver, [fake_offer]) - - assert mock_driver.suppressOffers.call_count == 0 - assert not ef.are_offers_suppressed - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.launchTasks.call_count == 1 - assert mock_get_metric.call_count == 3 - assert ef.task_metadata[task_id].task_state == "UNKNOWN" - - -def test_resource_offers_no_tasks_to_launch( - ef, fake_offer, mock_driver, mock_get_metric -): - ef.suppress_after = 0.0 - - ef.resourceOffers(mock_driver, [fake_offer]) - - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.suppressOffers.call_count == 1 - assert ef.are_offers_suppressed - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - - -def test_resource_offers_blacklisted_offer( - ef, fake_task, fake_offer, mock_driver, mock_get_metric -): - ef.blacklisted_slaves = ef.blacklisted_slaves.append( - fake_offer.agent_id.value, - ) - ef.task_queue.put(fake_task) - ef.resourceOffers(mock_driver, [fake_offer]) - - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - - -def test_resource_offers_not_for_pool( - ef, fake_task, fake_offer, mock_driver, mock_get_metric -): - ef.offer_matches_pool = mock.Mock(return_value=(False, None)) - - ef.task_queue.put(fake_task) - ef.resourceOffers(mock_driver, [fake_offer]) - - assert ef.offer_matches_pool.call_count == 1 - assert ef.offer_matches_pool.call_args == mock.call(fake_offer) - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - - -def test_resource_offers_unmet_reqs( - ef, fake_task, fake_offer, mock_driver, mock_get_metric -): - ef.callbacks.get_tasks_for_offer = mock.Mock(return_value=([], [fake_task])) - - ef.task_queue.put(fake_task) - ef.resourceOffers(mock_driver, [fake_offer]) - - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 1 - mock_get_metric.assert_any_call(metrics.TASK_INSUFFICIENT_OFFER_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - - -def status_update_test_prep(state, reason=""): - task = MesosTaskConfig(cmd="/bin/true", name="fake_name", image="fake_image") - task_id = task.task_id - update = Dict(task_id=Dict(value=task_id), state=state, reason=reason) - task_metadata = TaskMetadata( - task_config=task, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=time.time()), - ) - - return update, task_id, task_metadata - - -def test_status_update_record_only(ef, mock_driver): - update, task_id, task_metadata = status_update_test_prep("fake_state1") - ef.translator = mock.Mock() - ef._driver = mock_driver - - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.statusUpdate(mock_driver, update) - - assert ef.task_metadata[task_id].task_state == "fake_state1" - assert len(ef.task_metadata[task_id].task_state_history) == 2 - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - assert mock_driver.acknowledgeStatusUpdate.call_args == mock.call(update) - - -def test_status_update_finished(ef, mock_driver, mock_get_metric): - # finished task does same thing as other states - update, task_id, task_metadata = status_update_test_prep("TASK_FINISHED") - ef.translator = mock.Mock() - ef._driver = mock_driver - - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.statusUpdate(mock_driver, update) - - assert task_id not in ef.task_metadata - assert mock_get_metric.call_count == 1 - assert mock_get_metric.call_args == mock.call(metrics.TASK_FINISHED_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - assert mock_driver.acknowledgeStatusUpdate.call_args == mock.call(update) - - -def test_ignore_status_update(ef, mock_driver, mock_get_metric): - update, task_id, task_metadata = status_update_test_prep("TASK_FINISHED") - ef.translator = mock.Mock() - ef._driver = mock_driver - - ef.statusUpdate(mock_driver, update) - - assert task_id not in ef.task_metadata - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - - -def test_task_lost_due_to_invalid_offers(ef, mock_driver, mock_get_metric): - update, task_id, task_metadata = status_update_test_prep( - state="TASK_LOST", reason="REASON_INVALID_OFFERS" - ) - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef._driver = mock_driver - - ef.statusUpdate(mock_driver, update) - - assert task_id in ef.task_metadata - assert mock_get_metric.call_count == 2 - assert ef.event_queue.qsize() == 0 - assert ef.task_queue.qsize() == 1 - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - - -def test_background_thread_removes_offer_timeout( - ef, - mock_driver, - fake_task, - mock_time, - mock_sleep, -): - mock_time.return_value = 2.0 - task_id = fake_task.task_id - fake_task = fake_task.set(offer_timeout=1) - task_metadata = TaskMetadata( - agent_id="fake_agent_id", - task_config=fake_task, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=0.0), - ) - ef.driver = mock_driver - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef._background_check() - assert ef.task_queue.empty() - assert task_id not in ef.task_metadata.keys() - assert not ef.event_queue.empty() - event = ef.event_queue.get(block=False) - assert event.terminal is True - assert event.success is False - assert event.task_id == task_id - - -def test_launch_tasks_for_offer_task_missing(ef, fake_task, fake_offer): - tasks_to_launch = [fake_task] - ef.launch_tasks_for_offer(fake_offer, tasks_to_launch) - - -def test_reconcile_task_unknown( - ef, - mock_driver, - fake_task, -): - ef._driver = mock_driver - ef._reconcile_tasks_at = 0 - assert fake_task.task_id not in ef.task_metadata - - ef.reconcile_task(fake_task) - assert fake_task.task_id in ef.task_metadata - assert mock_driver.reconcileTasks.call_count == 1 - - -def test_reconcile_task_existing( - ef, - mock_driver, - fake_task, -): - ef._driver = mock_driver - ef._reconcile_tasks_at = 0 - ef.task_metadata = ef.task_metadata.set( - fake_task.task_id, - TaskMetadata( - task_config=fake_task, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=time.time()), - ), - ) - - ef.reconcile_task(fake_task) - task_metadata = ef.task_metadata[fake_task.task_id] - assert len(task_metadata.task_state_history) == 2 - assert mock_driver.reconcileTasks.call_count == 1 diff --git a/tests/unit/plugins/mesos/logging_executor_test.py b/tests/unit/plugins/mesos/logging_executor_test.py deleted file mode 100644 index 69f549e2..00000000 --- a/tests/unit/plugins/mesos/logging_executor_test.py +++ /dev/null @@ -1,185 +0,0 @@ -from queue import Queue - -import mock -import pytest -from addict import Dict - -from task_processing.plugins.mesos.logging_executor import MesosLoggingExecutor -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def mock_Thread(): - with mock.patch("task_processing.plugins.mesos.logging_executor.Thread"): - yield - - -@pytest.fixture -def source_queue(): - return Queue() - - -@pytest.fixture -def mock_downstream(source_queue): - executor = mock.MagicMock() - executor.get_event_queue.return_value = source_queue - return executor - - -@pytest.fixture -def mock_logging_executor(mock_Thread, mock_downstream): - return MesosLoggingExecutor(downstream_executor=mock_downstream) - - -def test_run(mock_logging_executor, mock_downstream): - mock_config = MesosTaskConfig(image="fake", cmd="cat") - mock_logging_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - -def test_kill(mock_logging_executor, mock_downstream): - result = mock_logging_executor.kill("task") - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - - -def test_reconcile(mock_logging_executor, mock_downstream): - mock_logging_executor.reconcile("task") - assert mock_downstream.reconcile.call_args == mock.call("task") - - -def test_stop(mock_logging_executor, mock_downstream): - mock_logging_executor.stop() - assert mock_downstream.stop.call_args == mock.call() - assert mock_logging_executor.stopping - - -def test_event_loop_stores_staging_event(mock_logging_executor, source_queue): - raw = Dict( - { - "offer": { - "url": { - "scheme": "http", - "address": { - "ip": "1.2.3.4", - "port": 5051, - }, - }, - }, - } - ) - mock_event = mock.Mock( - kind="task", - platform_type="staging", - task_id="my_task", - raw=raw, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - - mock_logging_executor.event_loop() - task_data = mock_logging_executor.staging_tasks["my_task"] - assert task_data == "http://1.2.3.4:5051" - - -def test_event_loop_stores_staging_event_with_bogus_url( - mock_logging_executor, source_queue -): - raw = Dict( - { - "offer": { - "url": { - "scheme": None, - "address": {}, - }, - }, - } - ) - mock_event = mock.Mock( - kind="task", - platform_type="staging", - task_id="my_task", - raw=raw, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - - mock_logging_executor.event_loop() - task_data = mock_logging_executor.staging_tasks["my_task"] - assert task_data is None - - -def test_event_loop_continues_after_unknown_task(mock_logging_executor, source_queue): - unknown_event = mock.Mock( - kind="task", - platform_type="running", - task_id="new_task", - ) - other_event = mock.Mock( - kind="task", - platform_type="something", - task_id="other_task", - ) - - mock_logging_executor.stopping = True - source_queue.put(unknown_event) - source_queue.put(other_event) - - mock_logging_executor.event_loop() - - dest_queue = mock_logging_executor.get_event_queue() - assert dest_queue.get() == unknown_event - assert dest_queue.get() == other_event - - -def test_event_loop_running_event(mock_logging_executor, source_queue): - raw = Dict( - { - "container_status": { - "container_id": { - "value": "cid", - }, - }, - "executor_id": { - "value": "eid", - }, - } - ) - mock_event = mock.Mock( - kind="task", - platform_type="running", - task_id="my_task", - raw=raw, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - mock_logging_executor.staging_tasks = mock_logging_executor.staging_tasks.set( - "my_task", "my_log_url" - ) - - mock_logging_executor.event_loop() - assert "my_task" in mock_logging_executor.running_tasks - assert "my_task" not in mock_logging_executor.staging_tasks - - -def test_event_loop_terminal_event(mock_logging_executor, source_queue): - mock_event = mock.Mock( - kind="task", - platform_type="finished", - task_id="my_task", - terminal=True, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - mock_logging_executor.running_tasks = mock_logging_executor.running_tasks.set( - "my_task", mock.Mock() - ) - - mock_logging_executor.event_loop() - - assert "my_task" in mock_logging_executor.running_tasks - assert "my_task" in mock_logging_executor.done_tasks diff --git a/tests/unit/plugins/mesos/mesos_executor_test.py b/tests/unit/plugins/mesos/mesos_executor_test.py deleted file mode 100644 index fea3b3cc..00000000 --- a/tests/unit/plugins/mesos/mesos_executor_test.py +++ /dev/null @@ -1,90 +0,0 @@ -import mock -import pytest - -from task_processing.plugins.mesos.mesos_executor import MesosExecutor -from task_processing.plugins.mesos.mesos_executor import MesosExecutorCallbacks - - -@pytest.fixture -def mock_callbacks(): - return (MesosExecutorCallbacks(mock.Mock(), mock.Mock(), mock.Mock()),) - - -@pytest.fixture -def mesos_executor(request, mock_callbacks, mock_Thread, mock_fw_and_driver): - dummy_executor = MesosExecutor("role", callbacks=mock_callbacks) - - def mesos_executor_teardown(): - dummy_executor.stop() - - request.addfinalizer(mesos_executor_teardown) - - return dummy_executor - - -def test_creates_execution_framework_and_driver( - mock_callbacks, - mock_Thread, - mesos_executor, - mock_fw_and_driver, -): - execution_framework, mesos_driver = mock_fw_and_driver - assert mesos_executor.execution_framework is execution_framework.return_value - assert execution_framework.call_args == mock.call( - name="taskproc-default", - task_staging_timeout_s=240, - initial_decline_delay=1.0, - pool=None, - role="role", - callbacks=mock_callbacks, - framework_id=None, - ) - - assert mesos_executor.driver is mesos_driver.return_value - assert mesos_driver.call_args == mock.call( - sched=execution_framework.return_value, - framework=execution_framework.return_value.framework_info, - use_addict=True, - master_uri="127.0.0.1:5050", - implicit_acknowledgements=False, - principal="taskproc", - secret=None, - failover=False, - ) - - assert mock_Thread.call_args == mock.call( - target=mesos_executor._run_driver, args=() - ) - - -def test_run_passes_task_to_execution_framework(mesos_executor): - mesos_executor.run("task") - assert mesos_executor.execution_framework.enqueue_task.call_args == mock.call( - "task" - ) - - -def test_stop_shuts_down_properly(mesos_executor): - mesos_executor.stop() - assert mesos_executor.execution_framework.stop.call_count == 1 - assert mesos_executor.driver.stop.call_count == 1 - assert mesos_executor.driver.join.call_count == 1 - - -def test_event_queue(mocker, mesos_executor): - q = mocker.Mock() - mesos_executor.execution_framework.event_queue = q - assert mesos_executor.get_event_queue() is q - - -def test_kill_returns(mesos_executor): - result = mesos_executor.kill("task") - assert result == mesos_executor.execution_framework.kill_task.return_value - assert mesos_executor.execution_framework.kill_task.call_args == mock.call("task") - - -def test_reconcile(mesos_executor): - mesos_executor.reconcile("task") - assert mesos_executor.execution_framework.reconcile_task.call_args == mock.call( - "task" - ) diff --git a/tests/unit/plugins/mesos/mesos_task_config_test.py b/tests/unit/plugins/mesos/mesos_task_config_test.py deleted file mode 100644 index 1514bb11..00000000 --- a/tests/unit/plugins/mesos/mesos_task_config_test.py +++ /dev/null @@ -1,35 +0,0 @@ -from pyrsistent import InvariantException - -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -def test_mesos_task_config_factories(): - m = MesosTaskConfig( - cmd="/bin/true", cpus=1, mem=64, disk=15, gpus=6.0, image="fake_image" - ) - - assert type(m.cpus) is float - assert m.cpus == 1.0 - - assert type(m.mem) is float - assert m.mem == 64.0 - - assert type(m.disk) is float - assert m.disk == 15.0 - - assert type(m.gpus) is int - assert m.gpus == 6 - - try: - m = m.set(name="a" * 256) - assert False, "Task id longer than 255 characters was accepted" - except InvariantException as e: - print(e) - assert True - - -def test_mesos_task_config_set_task_id(): - m = MesosTaskConfig(cmd="/bin/true", image="fake") - new_task_id = "new" + m.task_id - result = m.set_task_id(new_task_id) - assert result.task_id == new_task_id diff --git a/tests/unit/plugins/mesos/mesos_task_executor_test.py b/tests/unit/plugins/mesos/mesos_task_executor_test.py deleted file mode 100644 index e6b89a7f..00000000 --- a/tests/unit/plugins/mesos/mesos_task_executor_test.py +++ /dev/null @@ -1,48 +0,0 @@ -import mock -import pytest - -from task_processing.plugins.mesos.mesos_task_executor import get_tasks_for_offer - - -@pytest.fixture -def resource_patches(): - with mock.patch( - "task_processing.plugins.mesos.mesos_task_executor.task_fits", - ) as mock_fits, mock.patch( - "task_processing.plugins.mesos.mesos_task_executor.attributes_match_constraints", - ) as mock_constraints, mock.patch( - "task_processing.plugins.mesos.mesos_task_executor.allocate_task_resources", - ) as mock_allocate: - yield mock_fits, mock_constraints, mock_allocate - - -@pytest.mark.parametrize("fits,constraints", [(False, True), (True, False)]) -def test_get_tasks_for_offer_doesnt_fit(resource_patches, fits, constraints): - mock_fits, mock_constraints, mock_allocate = resource_patches - mock_fits.return_value = fits - mock_constraints.return_value = constraints - tasks_to_launch, tasks_to_defer = get_tasks_for_offer( - [mock.Mock()], - mock.Mock(), - mock.Mock(), - "role", - ) - - assert mock_allocate.call_count == 0 - assert len(tasks_to_launch) == 0 - assert len(tasks_to_defer) == 1 - - -def test_get_tasks_for_offer(resource_patches): - _, _, mock_allocate = resource_patches - mock_allocate.return_value = mock.Mock(), [] - tasks_to_launch, tasks_to_defer = get_tasks_for_offer( - [mock.Mock()], - mock.Mock(), - mock.Mock(), - "role", - ) - - assert mock_allocate.call_count == 1 - assert len(tasks_to_launch) == 1 - assert len(tasks_to_defer) == 0 diff --git a/tests/unit/plugins/mesos/resource_helpers_test.py b/tests/unit/plugins/mesos/resource_helpers_test.py deleted file mode 100644 index d8038b73..00000000 --- a/tests/unit/plugins/mesos/resource_helpers_test.py +++ /dev/null @@ -1,66 +0,0 @@ -import pytest -from pyrsistent import m -from pyrsistent import v - -from task_processing.plugins.mesos.resource_helpers import allocate_task_resources -from task_processing.plugins.mesos.resource_helpers import get_offer_resources -from task_processing.plugins.mesos.resource_helpers import ResourceSet -from task_processing.plugins.mesos.resource_helpers import task_fits - - -@pytest.fixture -def offer_resources(): - return ResourceSet( - cpus=10, - mem=1024, - disk=1000, - gpus=1, - ) - - -@pytest.mark.parametrize("role", ["fake_role", "none"]) -def test_get_offer_resources(fake_offer, role): - assert get_offer_resources(fake_offer, role) == ResourceSet( - cpus=10 if role != "none" else 0, - mem=1024 if role != "none" else 0, - disk=1000 if role != "none" else 0, - gpus=1 if role != "none" else 0, - ports=v(m(begin=31200, end=31500)) if role != "none" else v(), - ) - - -@pytest.mark.parametrize( - "available_ports", - [ - v(m(begin=5, end=10)), - v(m(begin=3, end=3), m(begin=6, end=10)), - ], -) -def test_allocate_task_resources(fake_task, offer_resources, available_ports): - offer_resources = offer_resources.set("ports", available_ports) - expected_port = available_ports[0].begin - consumed, remaining = allocate_task_resources(fake_task, offer_resources) - assert consumed == fake_task.set(ports=v(m(begin=expected_port, end=expected_port))) - assert remaining == { - "cpus": 0, - "mem": 0, - "disk": 0, - "gpus": 0, - "ports": v(m(begin=6, end=10)), - } - - -@pytest.mark.parametrize( - "cpus,available_ports", - [ - (5, v([m(begin=5, end=10)])), - (10, v()), - (10, v([m(begin=5, end=10)])), - ], -) -def test_task_fits(fake_task, offer_resources, cpus, available_ports): - offer_resources = offer_resources.set("cpus", cpus) - offer_resources = offer_resources.set("ports", available_ports) - assert task_fits(fake_task, offer_resources) == ( - cpus == 10 and len(available_ports) > 0 - ) diff --git a/tests/unit/plugins/mesos/retrying_executor_test.py b/tests/unit/plugins/mesos/retrying_executor_test.py deleted file mode 100644 index 56e81eee..00000000 --- a/tests/unit/plugins/mesos/retrying_executor_test.py +++ /dev/null @@ -1,287 +0,0 @@ -from queue import Queue - -import mock -import pytest - -from task_processing.interfaces.event import Event -from task_processing.plugins.mesos.retrying_executor import RetryingExecutor -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def mock_Thread(): - with mock.patch("task_processing.plugins.mesos.retrying_executor.Thread"): - yield - - -@pytest.fixture -def source_queue(): - return Queue() - - -@pytest.fixture -def mock_downstream(source_queue): - executor = mock.MagicMock() - executor.get_event_queue.return_value = source_queue - return executor - - -@pytest.fixture -def mock_retrying_executor(mock_Thread, mock_downstream): - return RetryingExecutor( - downstream_executor=mock_downstream, - retries=2, - ) - - -@pytest.fixture -def mock_task_config(): - return MesosTaskConfig( - uuid="mock_uuid", - name="mock_name", - image="mock_image", - cmd="mock_cmd", - retries=5, - ) - - -@pytest.fixture -def mock_event(mock_task_config, is_terminal=False): - return Event( - kind="task", - timestamp=1234.5678, - terminal=is_terminal, - success=False, - task_id=mock_task_config.task_id, - platform_type="mesos", - message="mock_message", - task_config=mock_task_config, - raw="raw_event", - ) - - -# task_retry ############################################################# -def test_task_retry(mock_retrying_executor, mock_event): - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 3 - ) - mock_retrying_executor.run = mock.Mock() - - mock_retrying_executor.retry(mock_event) - - assert mock_retrying_executor.task_retries[mock_event.task_id] == 2 - assert mock_retrying_executor.run.call_count == 1 - - -def test_task_retry_retries_exhausted(mock_retrying_executor, mock_event): - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 0 - ) - mock_retrying_executor.run = mock.Mock() - - retry_attempted = mock_retrying_executor.retry(mock_event) - - assert mock_retrying_executor.task_retries[mock_event.task_id] == 0 - assert mock_retrying_executor.run.call_count == 0 - assert not retry_attempted - - -# retry_loop ############################################################# -def test_retry_loop_retries_task(mock_retrying_executor, mock_event): - mock_event = mock_event.set("terminal", True) - mock_retrying_executor.stopping = True - mock_retrying_executor._is_current_attempt = mock.Mock(return_value=True) - mock_retrying_executor._restore_task_id = mock.Mock(return_value=mock_event) - mock_retrying_executor.retry = mock.Mock(return_value=True) - mock_retrying_executor.retry_pred = mock.Mock(return_value=True) - mock_retrying_executor.src_queue.put(mock_event) - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 1 - ) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 0 - assert mock_retrying_executor.retry.call_count == 1 - - -def test_retry_loop_does_not_retry_task(mock_retrying_executor, mock_event): - mock_event = mock_event.set("terminal", True) - mock_retrying_executor.stopping = True - mock_retrying_executor._is_current_attempt = mock.Mock(return_value=True) - mock_retrying_executor.retry = mock.Mock(return_value=False) - mock_retrying_executor.retry_pred = mock.Mock(return_value=False) - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 1 - ) - modified_task_id = mock_event.task_id + "-retry1" - modified_mock_event = mock_event.set("task_id", modified_task_id) - mock_retrying_executor.src_queue = Queue() - mock_retrying_executor.src_queue.put(modified_mock_event) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 1 - assert len(mock_retrying_executor.task_retries) == 0 - - -def test_retry_loop_filters_out_non_task(mock_retrying_executor): - mock_event = Event( - kind="control", raw="some message", message="stop", terminal=True - ) - - mock_retrying_executor.stopping = True - mock_retrying_executor._is_current_attempt = mock.Mock(return_value=True) - mock_retrying_executor.event_with_retries = mock.Mock() - mock_retrying_executor.src_queue.put(mock_event) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 1 - - -# If retrying_executor receives an event about an attempt for a task the -# executor does not know about, it should add the task into task_retries -# and assume the event's attempt is the current attempt -def test_retry_loop_recover_attempt(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - modified_mock_event = mock_event.set("task_id", original_task_id + "-retry6") - modified_mock_event = modified_mock_event.set("terminal", True) - mock_retrying_executor.stopping = True - mock_retrying_executor.retry = mock.Mock(return_value=True) - mock_retrying_executor.retry_pred = mock.Mock(return_value=True) - mock_retrying_executor.src_queue.put(modified_mock_event) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 0 - assert mock_retrying_executor.retry.call_count == 1 - assert mock_retrying_executor.task_retries[original_task_id] == 6 - - -# run #################################################################### -def test_run(mock_retrying_executor, mock_downstream, mock_task_config): - mock_retrying_executor.run(mock_task_config) - - assert mock_downstream.run.call_count == 1 - assert mock_retrying_executor.task_retries[mock_task_config.task_id] == 5 - - # Config should be the same, except with retry number appended - config_with_retry = mock_downstream.run.call_args[0][0] - assert config_with_retry.task_id == mock_task_config.task_id + "-retry5" - assert config_with_retry.cmd == mock_task_config.cmd - assert config_with_retry.image == mock_task_config.image - - -def test_run_default_retries(mock_retrying_executor, mock_downstream): - mock_config = MesosTaskConfig(image="fake_image", cmd="some command") - mock_retrying_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - assert mock_retrying_executor.task_retries[mock_config.task_id] == 2 - - -# reconcile ############################################################## -def test_reconcile(mock_retrying_executor, mock_downstream): - mock_retrying_executor.reconcile("task") - - assert mock_downstream.reconcile.call_args == mock.call("task") - - -# kill ################################################################### -def test_kill(mock_retrying_executor, mock_downstream): - result = mock_retrying_executor.kill("task") - - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - assert mock_retrying_executor.task_retries["task"] == -1 - - -# stop ################################################################### -def test_stop(mock_retrying_executor, mock_downstream): - mock_retrying_executor.stop() - - assert mock_downstream.stop.call_args == mock.call() - assert mock_retrying_executor.stopping is True - - -# _task_config_with_retry ################################################ -def test_task_config_with_retry(mock_retrying_executor, mock_task_config): - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_task_config.task_id, 2 - ) - - ret_value = mock_retrying_executor._task_config_with_retry(mock_task_config) - - assert ret_value.task_id == mock_task_config.task_id + "-retry2" - - -# _restore_task_id ####################################################### -def test_restore_task_id(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 1 - ) - modified_task_config = mock_event.task_config.set( - "uuid", str(mock_event.task_config.uuid) + "-retry1" - ) - mock_event = mock_event.set("task_config", modified_task_config) - - ret_value = mock_retrying_executor._restore_task_id(mock_event, original_task_id) - - assert mock_event.task_id == ret_value.task_id - - -# _is_current_attempt #################################################### -def test_is_current_attempt( - mock_retrying_executor, - mock_event, - mock_task_config, -): - original_task_id = mock_event.task_id - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 2 - ) - modified_task_id = str(mock_event.task_config.uuid) + "-retry2" - modified_task_config = mock_event.task_config.set("uuid", modified_task_id) - modified_mock_event = mock_event.set("task_config", modified_task_config) - modified_mock_event = mock_event.set("task_id", modified_task_id) - - ret_value = mock_retrying_executor._is_current_attempt( - modified_mock_event, original_task_id - ) - - assert ret_value is True - - -def test_is_not_current_attempt(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 2 - ) - modified_task_id = str(mock_event.task_config.uuid) + "-retry1" - modified_task_config = mock_event.task_config.set("uuid", modified_task_id) - modified_mock_event = mock_event.set("task_config", modified_task_config) - modified_mock_event = mock_event.set("task_id", modified_task_id) - - ret_value = mock_retrying_executor._is_current_attempt( - modified_mock_event, original_task_id - ) - - assert ret_value is False - - -def test_is_unknown_attempt(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - modified_task_id = str(mock_event.task_config.uuid) + "-retry8" - modified_task_config = mock_event.task_config.set("uuid", modified_task_id) - modified_mock_event = mock_event.set("task_config", modified_task_config) - modified_mock_event = mock_event.set("task_id", modified_task_id) - - ret_value = mock_retrying_executor._is_current_attempt( - modified_mock_event, - original_task_id, - ) - - assert ret_value is True - assert mock_retrying_executor.task_retries.get(original_task_id) == 8 diff --git a/tests/unit/plugins/mesos/timeout_executor_test.py b/tests/unit/plugins/mesos/timeout_executor_test.py deleted file mode 100644 index 2c787e8e..00000000 --- a/tests/unit/plugins/mesos/timeout_executor_test.py +++ /dev/null @@ -1,209 +0,0 @@ -from queue import Queue - -import mock -import pytest - -from task_processing.interfaces.event import Event -from task_processing.plugins.mesos.task_config import MesosTaskConfig -from task_processing.plugins.mesos.timeout_executor import TaskEntry -from task_processing.plugins.mesos.timeout_executor import TimeoutExecutor - - -@pytest.fixture -def mock_Thread(): - with mock.patch("task_processing.plugins.mesos.timeout_executor.Thread"): - yield - - -@pytest.fixture -def source_queue(): - return Queue() - - -@pytest.fixture -def mock_downstream(source_queue): - executor = mock.MagicMock() - executor.get_event_queue.return_value = source_queue - return executor - - -@pytest.fixture -def mock_timeout_executor(mock_Thread, mock_downstream): - return TimeoutExecutor(downstream_executor=mock_downstream) - - -@pytest.fixture -def mock_task_config(): - return MesosTaskConfig( - uuid="mock_uuid", - name="mock_name", - image="mock_image", - cmd="mock_cmd", - timeout=1000, - ) - - -@pytest.fixture -def mock_entry(mock_task_config): - return TaskEntry( - task_id=mock_task_config.task_id, - deadline=mock_task_config.timeout + 2000, - ) - - -@pytest.fixture -def mock_event(mock_task_config): - return Event( - kind="task", - timestamp=1234.5678, - terminal=True, - task_id=mock_task_config.task_id, - platform_type="mesos", - message="mock_message", - task_config=mock_task_config, - raw="raw_event", - ) - - -# timeout_loop ########################################################### -def test_timeout_loop_nontask( - mock_timeout_executor, - mock_event, -): - mock_event = mock_event.set("kind", "control") - mock_entry = TaskEntry("different_id", deadline=1234) - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.running_tasks.append(mock_entry) - - with mock.patch("time.time", mock.Mock(return_value=0)): - mock_timeout_executor.timeout_loop() - - assert len(mock_timeout_executor.running_tasks) == 1 - - -def test_timeout_loop_terminal_task_timed_out( - mock_timeout_executor, - mock_event, - mock_entry, -): - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.running_tasks.append(mock_entry) - mock_timeout_executor.killed_tasks.append(mock_entry.task_id) - mock_timeout_executor.downstream_executor.kill = mock.Mock() - - mock_timeout_executor.timeout_loop() - - assert mock_timeout_executor.downstream_executor.kill.call_count == 0 - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 0 - - -def test_timeout_loop_existing_nonterminal_task( - mock_timeout_executor, - mock_event, - mock_entry, -): - mock_event = mock_event.set("terminal", False) - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.running_tasks.append(mock_entry) - mock_timeout_executor.downstream_executor.kill = mock.Mock() - - with mock.patch("time.time", mock.Mock(return_value=10000)): - mock_timeout_executor.timeout_loop() - - assert mock_timeout_executor.downstream_executor.kill.call_args == mock.call( - mock_entry.task_id - ) - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 1 - - -def test_timeout_loop_nonexistent_nonterminal_task( - mock_timeout_executor, - mock_event, - mock_entry, -): - mock_event = mock_event.set("terminal", False) - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.downstream_executor.kill = mock.Mock() - - with mock.patch("time.time", mock.Mock(return_value=10000)): - mock_timeout_executor.timeout_loop() - - assert mock_timeout_executor.downstream_executor.kill.call_args == mock.call( - mock_entry.task_id - ) - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 1 - - -# run #################################################################### -def test_run(mock_timeout_executor, mock_downstream): - mock_config = MesosTaskConfig(image="fake", cmd="cat", timeout=60) - mock_timeout_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - assert len(mock_timeout_executor.running_tasks) == 1 - - -# reconcile ############################################################## -def test_reconcile(mock_timeout_executor, mock_downstream): - mock_timeout_executor.reconcile("task") - assert mock_downstream.reconcile.call_args == mock.call("task") - - -# kill ################################################################### -def test_kill_existing_task(mock_timeout_executor, mock_downstream): - mock_timeout_executor.running_tasks = [TaskEntry("task", 10)] - mock_timeout_executor.downstream_executor.kill = mock.Mock(return_value=True) - - result = mock_timeout_executor.kill("task") - - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 1 - - -# stop ################################################################### -def test_stop(mock_timeout_executor, mock_downstream): - mock_timeout_executor.stop() - assert mock_downstream.stop.call_args == mock.call() - assert mock_timeout_executor.stopping - - -# _insert_new_running_task_entry ######################################### -def test_insert_new_running_task_entry_enumerate(mock_timeout_executor): - mock_entry_one = TaskEntry("fake_entry_one", 1) - mock_entry_two = TaskEntry("fake_entry_two", 2) - mock_entry_three = TaskEntry("fake_entry_three", 3) - mock_timeout_executor.running_tasks.append(mock_entry_one) - mock_timeout_executor.running_tasks.append(mock_entry_three) - - mock_timeout_executor._insert_new_running_task_entry(mock_entry_two) - - assert [entry.deadline for entry in mock_timeout_executor.running_tasks] == [ - 1, - 2, - 3, - ] - - -def test_insert_new_running_task_entry_append(mock_timeout_executor): - mock_entry_one = TaskEntry("fake_entry_one", 1) - mock_entry_two = TaskEntry("fake_entry_two", 2) - mock_entry_three = TaskEntry("fake_entry_three", 3) - mock_timeout_executor.running_tasks.append(mock_entry_one) - mock_timeout_executor.running_tasks.append(mock_entry_two) - - mock_timeout_executor._insert_new_running_task_entry(mock_entry_three) - - assert [entry.deadline for entry in mock_timeout_executor.running_tasks] == [ - 1, - 2, - 3, - ] diff --git a/tests/unit/plugins/mesos/translator_test.py b/tests/unit/plugins/mesos/translator_test.py deleted file mode 100644 index 6dc44fa3..00000000 --- a/tests/unit/plugins/mesos/translator_test.py +++ /dev/null @@ -1,151 +0,0 @@ -import addict -import mock -import pytest -from pyrsistent import v - -from task_processing.interfaces.event import Event -from task_processing.plugins.mesos.translator import make_mesos_task_info -from task_processing.plugins.mesos.translator import MESOS_STATUS_MAP -from task_processing.plugins.mesos.translator import mesos_update_to_event - - -@pytest.mark.parametrize( - "gpus_count,containerizer,container", - [ - ( - 1.0, - "MESOS", - addict.Dict( - type="MESOS", - volumes=[ - addict.Dict( - container_path="fake_container_path", - host_path="fake_host_path", - mode="RO", - ) - ], - mesos=addict.Dict( - image=addict.Dict( - type="DOCKER", - docker=addict.Dict(name="fake_image"), - cached=True, - ), - ), - network_infos=addict.Dict( - port_mappings=[addict.Dict(host_port=31200, container_port=8888)], - ), - ), - ), - ( - 0, - "DOCKER", - addict.Dict( - type="DOCKER", - volumes=[ - addict.Dict( - container_path="fake_container_path", - host_path="fake_host_path", - mode="RO", - ) - ], - docker=addict.Dict( - image="fake_image", - network="BRIDGE", - force_pull_image=False, - port_mappings=[addict.Dict(host_port=31200, container_port=8888)], - parameters=[], - ), - ), - ), - ], -) -def test_make_mesos_task_info( - fake_task, - fake_offer, - gpus_count, - containerizer, - container, -): - tid = fake_task.task_id - fake_task = fake_task.set( - volumes=v( - addict.Dict( - mode="RO", - container_path="fake_container_path", - host_path="fake_host_path", - ) - ), - gpus=gpus_count, - containerizer=containerizer, - ) - - task_info = make_mesos_task_info( - fake_task, - fake_offer.agent_id.value, - "fake_role", - ) - - expected_task_info = addict.Dict( - task_id=addict.Dict(value=tid), - agent_id=addict.Dict(value="fake_agent_id"), - name="executor-{id}".format(id=tid), - resources=[ - addict.Dict( - name="cpus", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=10.0), - ), - addict.Dict( - name="mem", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=1024.0), - ), - addict.Dict( - name="disk", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=1000.0), - ), - addict.Dict( - name="gpus", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=gpus_count), - ), - addict.Dict( - name="ports", - type="RANGES", - role="fake_role", - ranges=addict.Dict(range=[addict.Dict(begin=31200, end=31200)]), - ), - ], - command=addict.Dict( - value='echo "fake"', - uris=[], - environment=addict.Dict( - variables=[{"name": "MESOS_TASK_ID", "value": mock.ANY}] - ), - ), - container=container, - ) - assert task_info == expected_task_info - - -@mock.patch("task_processing.plugins.mesos.translator.time") -def test_mesos_update_to_event(mock_time): - mock_time.time.return_value = 12345678.0 - for key, val in MESOS_STATUS_MAP.items(): - mesos_status = mock.MagicMock() - mesos_status.state = key - assert mesos_update_to_event(mesos_status, addict.Dict(task_id="123")) == Event( - kind="task", - raw=mesos_status, - task_id="123", - task_config={"task_id": "123"}, - timestamp=12345678.0, - terminal=val.terminal, - platform_type=val.platform_type, - success=val.get("success", None), - ) diff --git a/tests/unit/plugins/persistence/dynamo_persistence_test.py b/tests/unit/plugins/persistence/dynamo_persistence_test.py deleted file mode 100644 index dff31084..00000000 --- a/tests/unit/plugins/persistence/dynamo_persistence_test.py +++ /dev/null @@ -1,99 +0,0 @@ -import pytest -from hypothesis import given -from hypothesis import HealthCheck -from hypothesis import settings -from hypothesis import strategies as st - -from task_processing.interfaces.event import Event -from task_processing.plugins.persistence.dynamodb_persistence import DynamoDBPersister - - -@pytest.fixture -def persister(mocker): - mock_session = mocker.Mock() - mock_session.client.return_value = [] - - mock_resource = mocker.Mock() - mock_resource.Table.return_value = mocker.Mock() - mock_session.resource.return_value = mock_resource - persister = DynamoDBPersister(table_name="foo", session=mock_session) - return persister - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given( - x=st.dictionaries( - keys=st.text(), values=st.decimals(allow_nan=False, allow_infinity=False) - ) -) -def test_replaces_decimals_dict(x, persister): - for k, v in persister._replace_decimals(x).items(): - assert type(v) == float - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=st.decimals(allow_nan=False, allow_infinity=False)) -def test_replaces_decimals_decimal(x, persister): - assert type(persister._replace_decimals(x)) is float - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=st.lists(st.decimals(allow_nan=False, allow_infinity=False))) -def test_replaces_decimals_list(x, persister): - assert all([type(v) == float for v in persister._replace_decimals(x)]) - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given( - x=st.one_of( - st.text(), - st.booleans(), - ) -) -def test_replaces_decimals_unaffected(x, persister): - assert persister._replace_decimals(x) == x - - -texts = st.text(max_size=5) -events = st.builds( - Event, - kind=st.sampled_from(["task", "control"]), - task_id=texts, - timestamp=st.floats(min_value=0, allow_nan=False, allow_infinity=False), - terminal=st.booleans(), - success=st.booleans(), - task_config=st.dictionaries( - max_size=5, - keys=texts, - values=st.lists( - st.one_of( - texts, - st.dictionaries(max_size=5, keys=texts, values=texts), - ), - max_size=5, - ), - ), - raw=st.sampled_from([None]), -) - - -@settings(max_examples=50, suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=events) -def test_event_to_item_timestamp(x, persister): - res = persister._event_to_item(x)["M"] - assert "N" in res["timestamp"].keys() - assert "BOOL" in res["success"].keys() - assert "BOOL" in res["terminal"].keys() - assert "M" in res["task_config"].keys() - - -@settings(max_examples=50, suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=events) -def test_event_to_item_list(x, persister): - res = persister._event_to_item(x)["M"] - for k, v in x.task_config.items(): - if len(v) > 0: - assert k in res["task_config"]["M"] - else: - assert k not in res["task_config"]["M"] - assert all([{"S": val} in ["task_config"]["M"][k]["L"] for val in v]) diff --git a/tests/unit/plugins/stateful/stateful_executor_test.py b/tests/unit/plugins/stateful/stateful_executor_test.py deleted file mode 100644 index 355fd172..00000000 --- a/tests/unit/plugins/stateful/stateful_executor_test.py +++ /dev/null @@ -1,44 +0,0 @@ -import mock -import pytest - -from task_processing.plugins.stateful.stateful_executor import StatefulTaskExecutor - - -@pytest.fixture -def mock_downstream(): - return mock.MagicMock() - - -@pytest.fixture -def mock_persister(): - return mock.MagicMock() - - -@pytest.fixture -def mock_stateful_executor(mock_Thread, mock_downstream, mock_persister): - return StatefulTaskExecutor( - downstream_executor=mock_downstream, - persister=mock_persister, - ) - - -def test_run(mock_stateful_executor, mock_downstream): - mock_config = mock.MagicMock() - mock_stateful_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - -def test_kill(mock_stateful_executor, mock_downstream): - result = mock_stateful_executor.kill("task") - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - - -def test_reconcile(mock_stateful_executor, mock_downstream): - mock_stateful_executor.reconcile("task") - assert mock_downstream.reconcile.call_args == mock.call("task") - - -def test_stop(mock_stateful_executor, mock_downstream): - mock_stateful_executor.stop() - assert mock_downstream.stop.call_args == mock.call() diff --git a/tox.ini b/tox.ini index ef766537..ac681e95 100644 --- a/tox.ini +++ b/tox.ini @@ -6,18 +6,13 @@ passenv = PIP_INDEX_URL deps = -rrequirements-dev.txt commands = - pip install -e .[mesos_executor,persistence,k8s] + pip install -e .[persistence,k8s] - pip install yelp-meteorite mypy task_processing pytest {posargs:tests}/unit pre-commit install -f --install-hooks pre-commit run --all-files -[testenv:mesos] -basepython = /usr/bin/python3.8 -commands = - pip install -e .[mesos_executor] - [testenv:docs] deps = sphinx commands = @@ -30,24 +25,11 @@ deps = twine commands = python setup.py sdist bdist_wheel -[testenv:integration] -deps = - docker-compose==1.7.1 -commands = - docker-compose -f examples/cluster/docker-compose.yaml down - docker-compose -f examples/cluster/docker-compose.yaml pull - docker-compose -f examples/cluster/docker-compose.yaml build - docker-compose -f examples/cluster/docker-compose.yaml \ - up -d zookeeper mesosmaster mesosagent - docker-compose -f examples/cluster/docker-compose.yaml scale mesosagent=1 - docker-compose -f examples/cluster/docker-compose.yaml \ - run playground /src/itest - [testenv:venv] basepython = /usr/bin/python3.8 envdir = venv commands = - pip install -e .[mesos_executor,metrics,persistence,k8s] + pip install -e .[metrics,persistence,k8s] [flake8] exclude = .git,__pycache__,.tox,docs,venv