diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/LICENSE b/bayesian-optimization/LICENSE similarity index 100% rename from bayesian-optimization/{{cookiecutter.project_name}}/LICENSE rename to bayesian-optimization/LICENSE diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/README.md b/bayesian-optimization/README.md similarity index 100% rename from bayesian-optimization/{{cookiecutter.project_name}}/README.md rename to bayesian-optimization/README.md diff --git a/bayesian-optimization/bayesian_optimization.py b/bayesian-optimization/bayesian_optimization.py new file mode 100644 index 0000000..e0c165a --- /dev/null +++ b/bayesian-optimization/bayesian_optimization.py @@ -0,0 +1,106 @@ +from typing import Dict, List + +from flytekit import task, workflow, dynamic, ImageSpec +from bayes_opt import BayesianOptimization, UtilityFunction + +""" +Image Spec is a way to specify how to build a container image without a Dockerfile. The image spec by default will be +converted to an `Envd `__ config, and the `Envd builder +`__ will build the image for you. However, you can also register your own builder to build +the image using other tools. +For every :py:class:`flytekit.PythonFunctionTask` task or a task decorated with the ``@task`` decorator, +you can specify rules for binding container images. By default, flytekit binds a single container image, i.e., +the `default Docker image `__, to all tasks. To modify this behavior, +use the ``container_image`` parameter available in the :py:func:`flytekit.task` decorator, and pass an +``ImageSpec``. +Before building the image, Flytekit checks the container registry first to see if the image already exists. By doing +so, it avoids having to rebuild the image over and over again. If the image does not exist, flytekit will build the +image before registering the workflow, and replace the image name in the task template with the newly built image name. +""" +image_definition = ImageSpec( + name="flytekit", # rename this to your docker image name + base_image="ghcr.io/flyteorg/flytekit:py3.10-1.6.0", # this is the base image that flytekit will use to build your image + registry="ghcr.io/unionai-oss", # this is the registry where your image will be pushed to + packages=["flytekit>=1.6.0", "bayesian-optimization==1.4.3"], # these are the packages that will be installed in your image + python_version="3.10", # this is the python version that will be used to build your image +) + +Point = Dict[str, float] + + +@task(container_image=image_definition) +def black_box_function(point: Point) -> float: + # implement your function to optimize here! + x, y = point["x"], point["y"] + return -x ** 2 - (y - 1) ** 2 + 1 + + +@task(container_image=image_definition) +def suggest_points( + optimizer: BayesianOptimization, + utility: UtilityFunction, + concurrency: int, +) -> List[Point]: + points = set() + # make sure that points are unique + while len(points) < concurrency: + points.add(tuple([(k, float(v)) for k, v in optimizer.suggest(utility).items()])) + return [dict(x) for x in points] + + +@task(container_image=image_definition) +def register_targets( + optimizer: BayesianOptimization, + points: List[Point], + targets: List[float], +) -> BayesianOptimization: + for point, target in zip(points, targets): + optimizer.register(params=point, target=target) + return optimizer + + +@task(container_image=image_definition) +def log_iteration( + optimizer: BayesianOptimization, + points: List[Point], + targets: List[float], +): + print(f"{points=}\n{targets=}\n{optimizer.max=}\n") + + +@task(container_image=image_definition) +def return_max(optimizer: BayesianOptimization) -> Dict: + return optimizer.max + + +@dynamic(container_image=image_definition) +def concurrent_trials(points: List[Point]) -> List[float]: + return [black_box_function(point=point) for point in points] + + +@dynamic(container_image=image_definition) +def bayesopt(n_iter: int, concurrency: int) -> Dict: + optimizer = BayesianOptimization( + f=None, + pbounds={"x": (-2, 2), "y": (-3, 3)}, + verbose=2, + random_state=1, + ) + utility = UtilityFunction(kind="ucb", kappa=2.5, xi=0.0) + for _ in range(n_iter): + points = suggest_points(optimizer=optimizer, utility=utility, concurrency=concurrency) + targets = concurrent_trials(points=points) + optimizer = register_targets(optimizer=optimizer, points=points, targets=targets) + log_iteration(optimizer=optimizer, points=points, targets=targets) + # return point that maximized the target + return return_max(optimizer=optimizer) + + +@workflow +def wf(n_iter: int = 10, concurrency: int = 10) -> Dict: + return bayesopt(n_iter=n_iter, concurrency=concurrency) + + +if __name__ == "__main__": + print(wf()) diff --git a/bayesian-optimization/cookiecutter.json b/bayesian-optimization/cookiecutter.json deleted file mode 100644 index 295214e..0000000 --- a/bayesian-optimization/cookiecutter.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "project_name": "Bayesian Optimization" -} diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/requirements.txt b/bayesian-optimization/requirements.txt similarity index 100% rename from bayesian-optimization/{{cookiecutter.project_name}}/requirements.txt rename to bayesian-optimization/requirements.txt diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/.gitignore b/bayesian-optimization/{{cookiecutter.project_name}}/.gitignore deleted file mode 100644 index 77d9962..0000000 --- a/bayesian-optimization/{{cookiecutter.project_name}}/.gitignore +++ /dev/null @@ -1,138 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# IDE -.idea/* - -# Flyte serialization artifacts -_pb_output -_pb_output/* - -flyte-package.tgz diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/Dockerfile b/bayesian-optimization/{{cookiecutter.project_name}}/Dockerfile deleted file mode 100644 index 71425fb..0000000 --- a/bayesian-optimization/{{cookiecutter.project_name}}/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM python:3.8-slim-buster - -WORKDIR /root -ENV VENV /opt/venv -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV PYTHONPATH /root - -RUN apt-get update && apt-get install -y build-essential - -ENV VENV /opt/venv -# Virtual environment -RUN python3 -m venv ${VENV} -ENV PATH="${VENV}/bin:$PATH" - -# Install Python dependencies -COPY requirements.txt /root -RUN pip install -r /root/requirements.txt - -# Copy the actual code -COPY . /root - -# This tag is supplied by the build script and will be used to determine the version -# when registering tasks, workflows, and launch plans -ARG tag -ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/docker_build.sh b/bayesian-optimization/{{cookiecutter.project_name}}/docker_build.sh deleted file mode 100755 index 83f9069..0000000 --- a/bayesian-optimization/{{cookiecutter.project_name}}/docker_build.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -set -e - -# SET the REGISTRY here, where the docker container should be pushed -REGISTRY="" - -# SET the appname here -PROJECT_NAME="{{ cookiecutter.project_name }}" - -while getopts a:r:v:h flag -do - case "${flag}" in - p) PROJECT_NAME=${OPTARG};; - r) REGISTRY=${OPTARG};; - v) VERSION=${OPTARG};; - h) echo "Usage: ${0} [-h|[-p ][-r ][-v ]]" - echo " h: help (this message)" - echo " p: PROJECT_NAME for your workflows. Defaults to '{{ cookiecutter.project_name }}'." - echo " r: REGISTRY name where the docker container should be pushed. Defaults to none - localhost" - echo " v: VERSION of the build. Defaults to using the current git head SHA" - exit 1;; - *) echo "Usage: ${0} [-h|[-a ][-r ][-v ]]" - exit 1;; - esac -done - -# If you are using git, then this will automatically use the git head as the -# version -if [ -z "${VERSION}" ]; then - echo "No version set, using git commit head sha as the version" - VERSION=$(git rev-parse HEAD) -fi - -TAG=${PROJECT_NAME}:${VERSION} -if [ -z "${REGISTRY}" ]; then - echo "No registry set, creating tag ${TAG}" -else - TAG="${REGISTRY}/${TAG}" - echo "Registry set: creating tag ${TAG}" -fi - -# Should be run in the folder that has Dockerfile -docker build --tag ${TAG} . - -echo "Docker image built with tag ${TAG}. You can use this image to run pyflyte package." diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/workflows/__init__.py b/bayesian-optimization/{{cookiecutter.project_name}}/workflows/__init__.py deleted file mode 100644 index 5313296..0000000 --- a/bayesian-optimization/{{cookiecutter.project_name}}/workflows/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .bayesian_optimization_example import wf as wf \ No newline at end of file diff --git a/bayesian-optimization/{{cookiecutter.project_name}}/workflows/bayesian_optimization_example.py b/bayesian-optimization/{{cookiecutter.project_name}}/workflows/bayesian_optimization_example.py deleted file mode 100644 index 3ecce9b..0000000 --- a/bayesian-optimization/{{cookiecutter.project_name}}/workflows/bayesian_optimization_example.py +++ /dev/null @@ -1,77 +0,0 @@ -from typing import Dict, List - -from flytekit import task, workflow, dynamic -from bayes_opt import BayesianOptimization, UtilityFunction - - -Point = Dict[str, float] - - -@task -def black_box_function(point: Point) -> float: - # implement your function to optimize here! - x, y = point["x"], point["y"] - return -x ** 2 - (y - 1) ** 2 + 1 - -@task -def suggest_points( - optimizer: BayesianOptimization, - utility: UtilityFunction, - concurrency: int, -) -> List[Point]: - points = set() - # make sure that points are unique - while len(points) < concurrency: - points.add(tuple([(k, float(v)) for k, v in optimizer.suggest(utility).items()])) - return [dict(x) for x in points] - -@task -def register_targets( - optimizer: BayesianOptimization, - points: List[Point], - targets: List[float], -) -> BayesianOptimization: - for point, target in zip(points, targets): - optimizer.register(params=point, target=target) - return optimizer - -@task -def log_iteration( - optimizer: BayesianOptimization, - points: List[Point], - targets: List[float], -): - print(f"{points=}\n{targets=}\n{optimizer.max=}\n") - -@task -def return_max(optimizer: BayesianOptimization) -> Dict: - return optimizer.max - -@dynamic -def concurrent_trials(points: List[Point]) -> List[float]: - return [black_box_function(point=point) for point in points] - -@dynamic -def bayesopt(n_iter: int, concurrency: int) -> Dict: - optimizer = BayesianOptimization( - f=None, - pbounds={"x": (-2, 2), "y": (-3, 3)}, - verbose=2, - random_state=1, - ) - utility = UtilityFunction(kind="ucb", kappa=2.5, xi=0.0) - for _ in range(n_iter): - points = suggest_points(optimizer=optimizer, utility=utility, concurrency=concurrency) - targets = concurrent_trials(points=points) - optimizer = register_targets(optimizer=optimizer, points=points, targets=targets) - log_iteration(optimizer=optimizer, points=points, targets=targets) - # return point that maximized the target - return return_max(optimizer=optimizer) - -@workflow -def wf(n_iter: int = 10, concurrency: int = 10) -> Dict: - return bayesopt(n_iter=n_iter, concurrency=concurrency) - - -if __name__ == "__main__": - print(wf()) diff --git a/mnist-training/{{cookiecutter.project_name}}/LICENSE b/mnist-training/LICENSE similarity index 100% rename from mnist-training/{{cookiecutter.project_name}}/LICENSE rename to mnist-training/LICENSE diff --git a/mnist-training/{{cookiecutter.project_name}}/README.md b/mnist-training/README.md similarity index 100% rename from mnist-training/{{cookiecutter.project_name}}/README.md rename to mnist-training/README.md diff --git a/mnist-training/cookiecutter.json b/mnist-training/cookiecutter.json deleted file mode 100644 index 94014b4..0000000 --- a/mnist-training/cookiecutter.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "project_name": "MNIST Pytorch Training Example" -} diff --git a/mnist-training/{{cookiecutter.project_name}}/workflows/mnist_training_example.py b/mnist-training/mnist_training.py similarity index 100% rename from mnist-training/{{cookiecutter.project_name}}/workflows/mnist_training_example.py rename to mnist-training/mnist_training.py diff --git a/mnist-training/{{cookiecutter.project_name}}/requirements.txt b/mnist-training/requirements.txt similarity index 100% rename from mnist-training/{{cookiecutter.project_name}}/requirements.txt rename to mnist-training/requirements.txt diff --git a/mnist-training/{{cookiecutter.project_name}}/.gitignore b/mnist-training/{{cookiecutter.project_name}}/.gitignore deleted file mode 100644 index 77d9962..0000000 --- a/mnist-training/{{cookiecutter.project_name}}/.gitignore +++ /dev/null @@ -1,138 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# IDE -.idea/* - -# Flyte serialization artifacts -_pb_output -_pb_output/* - -flyte-package.tgz diff --git a/mnist-training/{{cookiecutter.project_name}}/Dockerfile b/mnist-training/{{cookiecutter.project_name}}/Dockerfile deleted file mode 100644 index 5b6f914..0000000 --- a/mnist-training/{{cookiecutter.project_name}}/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Ensure that your From Image is compatible with the version of pytorch you intend to use, -# and that the cuda version is compatible with the nvidia drivers located on the node. -FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime - -WORKDIR /root -ENV VENV /opt/venv -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV PYTHONPATH /root - -RUN apt-get update && apt-get install -y build-essential - -ENV VENV /opt/venv -# Virtual environment -RUN python3 -m venv ${VENV} -ENV PATH="${VENV}/bin:$PATH" - -# Install Python dependencies -COPY requirements.txt /root -RUN pip install -r /root/requirements.txt - -# Copy the actual code -COPY . /root - -# This tag is supplied by the build script and will be used to determine the version -# when registering tasks, workflows, and launch plans -ARG tag -ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/mnist-training/{{cookiecutter.project_name}}/docker_build.sh b/mnist-training/{{cookiecutter.project_name}}/docker_build.sh deleted file mode 100644 index 83f9069..0000000 --- a/mnist-training/{{cookiecutter.project_name}}/docker_build.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -set -e - -# SET the REGISTRY here, where the docker container should be pushed -REGISTRY="" - -# SET the appname here -PROJECT_NAME="{{ cookiecutter.project_name }}" - -while getopts a:r:v:h flag -do - case "${flag}" in - p) PROJECT_NAME=${OPTARG};; - r) REGISTRY=${OPTARG};; - v) VERSION=${OPTARG};; - h) echo "Usage: ${0} [-h|[-p ][-r ][-v ]]" - echo " h: help (this message)" - echo " p: PROJECT_NAME for your workflows. Defaults to '{{ cookiecutter.project_name }}'." - echo " r: REGISTRY name where the docker container should be pushed. Defaults to none - localhost" - echo " v: VERSION of the build. Defaults to using the current git head SHA" - exit 1;; - *) echo "Usage: ${0} [-h|[-a ][-r ][-v ]]" - exit 1;; - esac -done - -# If you are using git, then this will automatically use the git head as the -# version -if [ -z "${VERSION}" ]; then - echo "No version set, using git commit head sha as the version" - VERSION=$(git rev-parse HEAD) -fi - -TAG=${PROJECT_NAME}:${VERSION} -if [ -z "${REGISTRY}" ]; then - echo "No registry set, creating tag ${TAG}" -else - TAG="${REGISTRY}/${TAG}" - echo "Registry set: creating tag ${TAG}" -fi - -# Should be run in the folder that has Dockerfile -docker build --tag ${TAG} . - -echo "Docker image built with tag ${TAG}. You can use this image to run pyflyte package." diff --git a/mnist-training/{{cookiecutter.project_name}}/workflows/__init__.py b/mnist-training/{{cookiecutter.project_name}}/workflows/__init__.py deleted file mode 100644 index e09d193..0000000 --- a/mnist-training/{{cookiecutter.project_name}}/workflows/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .mnist_training_example import mnist_workflow_cpu as mnist_workflow_cpu -from .mnist_training_example import mnist_workflow_gpu as mnist_workflow_gpu \ No newline at end of file diff --git a/simple-example/{{cookiecutter.project_name}}/LICENSE b/simple-example/LICENSE similarity index 100% rename from simple-example/{{cookiecutter.project_name}}/LICENSE rename to simple-example/LICENSE diff --git a/simple-example/{{cookiecutter.project_name}}/README.md b/simple-example/README.md similarity index 100% rename from simple-example/{{cookiecutter.project_name}}/README.md rename to simple-example/README.md diff --git a/simple-example/cookiecutter.json b/simple-example/cookiecutter.json deleted file mode 100644 index aea629b..0000000 --- a/simple-example/cookiecutter.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "project_name": "flyte_example" -} diff --git a/simple-example/example.py b/simple-example/example.py new file mode 100644 index 0000000..46dd765 --- /dev/null +++ b/simple-example/example.py @@ -0,0 +1,70 @@ +"""A simple Flyte example.""" + +import typing +from flytekit import task, workflow, ImageSpec + +""" +Image Spec is a way to specify how to build a container image without a Dockerfile. The image spec by default will be +converted to an `Envd `__ config, and the `Envd builder +`__ will build the image for you. However, you can also register your own builder to build +the image using other tools. +For every :py:class:`flytekit.PythonFunctionTask` task or a task decorated with the ``@task`` decorator, +you can specify rules for binding container images. By default, flytekit binds a single container image, i.e., +the `default Docker image `__, to all tasks. To modify this behavior, +use the ``container_image`` parameter available in the :py:func:`flytekit.task` decorator, and pass an +``ImageSpec``. +Before building the image, Flytekit checks the container registry first to see if the image already exists. By doing +so, it avoids having to rebuild the image over and over again. If the image does not exist, flytekit will build the +image before registering the workflow, and replace the image name in the task template with the newly built image name. +""" +image_definition = ImageSpec( + name="flytekit", # rename this to your docker image name + base_image="ghcr.io/flyteorg/flytekit:py3.10-1.6.0", + # this is the base image that flytekit will use to build your image + registry="ghcr.io/unionai-oss", # this is the registry where your image will be pushed to + packages=["flytekit>=1.6.0"], # these are the packages that will be installed in your image + python_version="3.10", # this is the python version that will be used to build your image +) + + +@task(container_image=image_definition) +def say_hello(name: str) -> str: + """A simple Flyte task to say "hello". + + The @task decorator allows Flyte to use this function as a Flyte task, which + is executed as an isolated, containerized unit of compute. + """ + return f"hello {name}!" + + +@task(container_image=image_definition) +def greeting_length(greeting: str) -> int: + """A task the counts the length of a greeting.""" + return len(greeting) + + +@workflow +def wf(name: str = "union") -> typing.Tuple[str, int]: + """Declare workflow called `wf`. + + The @workflow decorator defines an execution graph that is composed of tasks + and potentially sub-workflows. In this simple example, the workflow is + composed of just one task. + + There are a few important things to note about workflows: + - Workflows are a domain-specific language (DSL) for creating execution + graphs and therefore only support a subset of Python's behavior. + - Tasks must be invoked with keyword arguments + - The output variables of tasks are Promises, which are placeholders for + values that are yet to be materialized, not the actual values. + """ + greeting = say_hello(name=name) + greeting_len = greeting_length(greeting=greeting) + return greeting, greeting_len + + +if __name__ == "__main__": + # Execute the workflow, simply by invoking it like a function and passing in + # the necessary parameters + print(f"Running wf() {wf(name='passengers')}") diff --git a/simple-example/{{cookiecutter.project_name}}/requirements.txt b/simple-example/requirements.txt similarity index 100% rename from simple-example/{{cookiecutter.project_name}}/requirements.txt rename to simple-example/requirements.txt diff --git a/simple-example/{{cookiecutter.project_name}}/.gitignore b/simple-example/{{cookiecutter.project_name}}/.gitignore deleted file mode 100644 index 77d9962..0000000 --- a/simple-example/{{cookiecutter.project_name}}/.gitignore +++ /dev/null @@ -1,138 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# IDE -.idea/* - -# Flyte serialization artifacts -_pb_output -_pb_output/* - -flyte-package.tgz diff --git a/simple-example/{{cookiecutter.project_name}}/Dockerfile b/simple-example/{{cookiecutter.project_name}}/Dockerfile deleted file mode 100644 index 71425fb..0000000 --- a/simple-example/{{cookiecutter.project_name}}/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM python:3.8-slim-buster - -WORKDIR /root -ENV VENV /opt/venv -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV PYTHONPATH /root - -RUN apt-get update && apt-get install -y build-essential - -ENV VENV /opt/venv -# Virtual environment -RUN python3 -m venv ${VENV} -ENV PATH="${VENV}/bin:$PATH" - -# Install Python dependencies -COPY requirements.txt /root -RUN pip install -r /root/requirements.txt - -# Copy the actual code -COPY . /root - -# This tag is supplied by the build script and will be used to determine the version -# when registering tasks, workflows, and launch plans -ARG tag -ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/simple-example/{{cookiecutter.project_name}}/docker_build.sh b/simple-example/{{cookiecutter.project_name}}/docker_build.sh deleted file mode 100755 index 83f9069..0000000 --- a/simple-example/{{cookiecutter.project_name}}/docker_build.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -set -e - -# SET the REGISTRY here, where the docker container should be pushed -REGISTRY="" - -# SET the appname here -PROJECT_NAME="{{ cookiecutter.project_name }}" - -while getopts a:r:v:h flag -do - case "${flag}" in - p) PROJECT_NAME=${OPTARG};; - r) REGISTRY=${OPTARG};; - v) VERSION=${OPTARG};; - h) echo "Usage: ${0} [-h|[-p ][-r ][-v ]]" - echo " h: help (this message)" - echo " p: PROJECT_NAME for your workflows. Defaults to '{{ cookiecutter.project_name }}'." - echo " r: REGISTRY name where the docker container should be pushed. Defaults to none - localhost" - echo " v: VERSION of the build. Defaults to using the current git head SHA" - exit 1;; - *) echo "Usage: ${0} [-h|[-a ][-r ][-v ]]" - exit 1;; - esac -done - -# If you are using git, then this will automatically use the git head as the -# version -if [ -z "${VERSION}" ]; then - echo "No version set, using git commit head sha as the version" - VERSION=$(git rev-parse HEAD) -fi - -TAG=${PROJECT_NAME}:${VERSION} -if [ -z "${REGISTRY}" ]; then - echo "No registry set, creating tag ${TAG}" -else - TAG="${REGISTRY}/${TAG}" - echo "Registry set: creating tag ${TAG}" -fi - -# Should be run in the folder that has Dockerfile -docker build --tag ${TAG} . - -echo "Docker image built with tag ${TAG}. You can use this image to run pyflyte package." diff --git a/simple-example/{{cookiecutter.project_name}}/workflows/__init__.py b/simple-example/{{cookiecutter.project_name}}/workflows/__init__.py deleted file mode 100644 index af0a508..0000000 --- a/simple-example/{{cookiecutter.project_name}}/workflows/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .example import wf as wf \ No newline at end of file diff --git a/simple-example/{{cookiecutter.project_name}}/workflows/example.py b/simple-example/{{cookiecutter.project_name}}/workflows/example.py deleted file mode 100644 index 1a01fb6..0000000 --- a/simple-example/{{cookiecutter.project_name}}/workflows/example.py +++ /dev/null @@ -1,45 +0,0 @@ -"""A simple Flyte example.""" - -import typing -from flytekit import task, workflow - - -@task -def say_hello(name: str) -> str: - """A simple Flyte task to say "hello". - - The @task decorator allows Flyte to use this function as a Flyte task, which - is executed as an isolated, containerized unit of compute. - """ - return f"hello {name}!" - - -@task -def greeting_length(greeting: str) -> int: - """A task the counts the length of a greeting.""" - return len(greeting) - -@workflow -def wf(name: str = "union") -> typing.Tuple[str, int]: - """Declare workflow called `wf`. - - The @workflow decorator defines an execution graph that is composed of tasks - and potentially sub-workflows. In this simple example, the workflow is - composed of just one task. - - There are a few important things to note about workflows: - - Workflows are a domain-specific language (DSL) for creating execution - graphs and therefore only support a subset of Python's behavior. - - Tasks must be invoked with keyword arguments - - The output variables of tasks are Promises, which are placeholders for - values that are yet to be materialized, not the actual values. - """ - greeting = say_hello(name=name) - greeting_len = greeting_length(greeting=greeting) - return greeting, greeting_len - - -if __name__ == "__main__": - # Execute the workflow, simply by invoking it like a function and passing in - # the necessary parameters - print(f"Running wf() { wf(name='passengers') }") diff --git a/wine-classification/{{cookiecutter.project_name}}/LICENSE b/wine-classification/LICENSE similarity index 100% rename from wine-classification/{{cookiecutter.project_name}}/LICENSE rename to wine-classification/LICENSE diff --git a/wine-classification/{{cookiecutter.project_name}}/README.md b/wine-classification/README.md similarity index 100% rename from wine-classification/{{cookiecutter.project_name}}/README.md rename to wine-classification/README.md diff --git a/wine-classification/cookiecutter.json b/wine-classification/cookiecutter.json deleted file mode 100644 index 311981b..0000000 --- a/wine-classification/cookiecutter.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "project_name": "Wine classification" -} diff --git a/wine-classification/{{cookiecutter.project_name}}/requirements.txt b/wine-classification/requirements.txt similarity index 100% rename from wine-classification/{{cookiecutter.project_name}}/requirements.txt rename to wine-classification/requirements.txt diff --git a/wine-classification/wine_classification.py b/wine-classification/wine_classification.py new file mode 100644 index 0000000..8ad04bb --- /dev/null +++ b/wine-classification/wine_classification.py @@ -0,0 +1,59 @@ +import pandas as pd + +from sklearn.datasets import load_wine +from sklearn.linear_model import LogisticRegression +from flytekit import task, workflow, ImageSpec + +""" +Image Spec is a way to specify how to build a container image without a Dockerfile. The image spec by default will be +converted to an `Envd `__ config, and the `Envd builder +`__ will build the image for you. However, you can also register your own builder to build +the image using other tools. +For every :py:class:`flytekit.PythonFunctionTask` task or a task decorated with the ``@task`` decorator, +you can specify rules for binding container images. By default, flytekit binds a single container image, i.e., +the `default Docker image `__, to all tasks. To modify this behavior, +use the ``container_image`` parameter available in the :py:func:`flytekit.task` decorator, and pass an +``ImageSpec``. +Before building the image, Flytekit checks the container registry first to see if the image already exists. By doing +so, it avoids having to rebuild the image over and over again. If the image does not exist, flytekit will build the +image before registering the workflow, and replace the image name in the task template with the newly built image name. +""" +image_definition = ImageSpec( + name="flytekit", # rename this to your docker image name + base_image="ghcr.io/flyteorg/flytekit:py3.10-1.6.0", + # this is the base image that flytekit will use to build your image + registry="ghcr.io/unionai-oss", # this is the registry where your image will be pushed to + packages=["flytekit>=1.6.0", "pandas==1.5.3", "scikit-learn==1.2.2"], # these are the packages that will be installed in your image + python_version="3.10", # this is the python version that will be used to build your image +) + +@task +def get_data() -> pd.DataFrame: + """Get the wine dataset.""" + return load_wine(as_frame=True).frame + +@task +def process_data(data: pd.DataFrame) -> pd.DataFrame: + """Simplify the task from a 3-class to a binary classification problem.""" + return data.assign(target=lambda x: x["target"].where(x["target"] == 0, 1)) + +@task +def train_model(data: pd.DataFrame, hyperparameters: dict) -> LogisticRegression: + """Train a model on the wine dataset.""" + features = data.drop("target", axis="columns") + target = data["target"] + return LogisticRegression(max_iter=3000, **hyperparameters).fit(features, target) + +@workflow +def training_workflow(hyperparameters: dict = {"C": 0.1}) -> LogisticRegression: + """Put all of the steps together into a single workflow.""" + data = get_data() + processed_data = process_data(data=data) + return train_model( + data=processed_data, + hyperparameters=hyperparameters, + ) + +if __name__ == "__main__": + training_workflow(hyperparameters={"C": 0.1}) diff --git a/wine-classification/{{cookiecutter.project_name}}/.gitignore b/wine-classification/{{cookiecutter.project_name}}/.gitignore deleted file mode 100644 index 77d9962..0000000 --- a/wine-classification/{{cookiecutter.project_name}}/.gitignore +++ /dev/null @@ -1,138 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# IDE -.idea/* - -# Flyte serialization artifacts -_pb_output -_pb_output/* - -flyte-package.tgz diff --git a/wine-classification/{{cookiecutter.project_name}}/Dockerfile b/wine-classification/{{cookiecutter.project_name}}/Dockerfile deleted file mode 100644 index c848bb8..0000000 --- a/wine-classification/{{cookiecutter.project_name}}/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM python:3.10-slim-buster - -WORKDIR /root -ENV VENV /opt/venv -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV PYTHONPATH /root - -RUN apt-get update && apt-get install -y build-essential - -ENV VENV /opt/venv -# Virtual environment -RUN python3 -m venv ${VENV} -ENV PATH="${VENV}/bin:$PATH" - -# Install Python dependencies -COPY requirements.txt /root -RUN pip install -r /root/requirements.txt - -# Copy the actual code -COPY . /root - -# This tag is supplied by the build script and will be used to determine the version -# when registering tasks, workflows, and launch plans -ARG tag -ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/wine-classification/{{cookiecutter.project_name}}/workflows/__init__.py b/wine-classification/{{cookiecutter.project_name}}/workflows/__init__.py deleted file mode 100644 index 11fb8b8..0000000 --- a/wine-classification/{{cookiecutter.project_name}}/workflows/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .wine_classification_example import training_workflow as training_workflow diff --git a/wine-classification/{{cookiecutter.project_name}}/workflows/wine_classification_example.py b/wine-classification/{{cookiecutter.project_name}}/workflows/wine_classification_example.py deleted file mode 100644 index 607930e..0000000 --- a/wine-classification/{{cookiecutter.project_name}}/workflows/wine_classification_example.py +++ /dev/null @@ -1,35 +0,0 @@ -import pandas as pd - -from sklearn.datasets import load_wine -from sklearn.linear_model import LogisticRegression -from flytekit import task, workflow - -@task -def get_data() -> pd.DataFrame: - """Get the wine dataset.""" - return load_wine(as_frame=True).frame - -@task -def process_data(data: pd.DataFrame) -> pd.DataFrame: - """Simplify the task from a 3-class to a binary classification problem.""" - return data.assign(target=lambda x: x["target"].where(x["target"] == 0, 1)) - -@task -def train_model(data: pd.DataFrame, hyperparameters: dict) -> LogisticRegression: - """Train a model on the wine dataset.""" - features = data.drop("target", axis="columns") - target = data["target"] - return LogisticRegression(max_iter=3000, **hyperparameters).fit(features, target) - -@workflow -def training_workflow(hyperparameters: dict = {"C": 0.1}) -> LogisticRegression: - """Put all of the steps together into a single workflow.""" - data = get_data() - processed_data = process_data(data=data) - return train_model( - data=processed_data, - hyperparameters=hyperparameters, - ) - -if __name__ == "__main__": - training_workflow(hyperparameters={"C": 0.1})