Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inference server #10

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions manifests/helm/app/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ spec:
spec:
containers:
- env:
- name: USE_SERVING_RUNTIME
value: "{{ .Values.useServingRuntime }}"
- name: ENVIRONMENT
{{- if .Values.isProd }}
value: prod
{{- else }}
value: dev
{{- end }}
- name: MLFLOW_TRACKING_URI
value: http://mlflow.mlflow.svc.cluster.local:5000
- name: MLFLOW_S3_ENDPOINT_URL
Expand Down
38 changes: 38 additions & 0 deletions manifests/helm/app/templates/rolebinding-model-controller.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-model-controller
namespace: {{ .Release.Namespace }}
rules:
- verbs:
- create
- delete
- get
- list
- patch
- update
apiGroups:
- serving.kserve.io
resources:
- inferenceservices
- verbs:
- create
- delete
apiGroups:
- batch
resources:
- jobs
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Release.Name }}-model-controller
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: model-controller
namespace: {{ .Release.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ .Release.Name }}-model-controller
66 changes: 66 additions & 0 deletions manifests/helm/app/templates/serving-runtime.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{{- if .Values.useServingRuntime }}
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
annotations:
opendatahub.io/apiProtocol: REST
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
opendatahub.io/template-display-name: OpenVINO Model Server
opendatahub.io/template-name: ovms
openshift.io/display-name: {{ .Release.Name }}
name: {{ .Release.Name }}
namespace: {{ .Release.Namespace }}
labels:
opendatahub.io/dashboard: 'true'
spec:
supportedModelFormats:
- autoSelect: true
name: openvino_ir
version: opset1
- autoSelect: true
name: onnx
version: '1'
- autoSelect: true
name: tensorflow
version: '2'
builtInAdapter:
env:
- name: OVMS_FORCE_TARGET_DEVICE
value: AUTO
memBufferBytes: 134217728
modelLoadingTimeoutMillis: 90000
runtimeManagementPort: 8888
serverType: ovms
multiModel: true
containers:
- args:
- '--port=8001'
- '--rest_port=8888'
- '--config_path=/models/model_config_list.json'
- '--file_system_poll_wait_seconds=0'
- '--grpc_bind_address=0.0.0.0'
- '--rest_bind_address=0.0.0.0'
image: 'quay.io/modh/openvino_model_server@sha256:f1140e9d987580d1aab1ccc62519b48b1d2673308b2db496e9e505e3be788d9f'
name: ovms
resources:
limits:
cpu: '2'
memory: 8Gi
requests:
cpu: '1'
memory: 4Gi
volumeMounts:
- mountPath: /dev/shm
name: shm
protocolVersions:
- grpc-v1
grpcEndpoint: 'port:8085'
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: shm
replicas: {{ .Values.replicaCount }}
tolerations: []
grpcDataEndpoint: 'port:8001'
{{- end }}
10 changes: 9 additions & 1 deletion manifests/helm/app/templates/workbench.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ spec:
secretKeyRef:
name: aws-connection-my-storage
key: AWS_S3_ENDPOINT
- name: USE_SERVING_RUNTIME
value: "{{ .Values.useServingRuntime }}"
- name: ENVIRONMENT
{{- if .Values.isProd }}
value: prod
{{- else }}
value: dev
{{- end }}
- name: PIP_CERT
value: /etc/pki/tls/custom-certs/ca-bundle.crt
- name: REQUESTS_CA_BUNDLE
Expand Down Expand Up @@ -158,7 +166,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46
image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4f8d66597feeb32bb18699326029f9a71a5aca4a57679d636b876377c2e95695
#image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.14
imagePullPolicy: Always
livenessProbe:
Expand Down
1 change: 1 addition & 0 deletions manifests/helm/app/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ image:
# Overrides the image tag whose default is the chart appVersion.

isProd: True
useServingRuntime: ${{ values.use_serving_runtime }}

route:
host: ""
Expand Down
6 changes: 6 additions & 0 deletions manifests/helm/build/templates/pipeline-training.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ spec:
- image: {{ include "image.dev-url" . }}-training:latest
name: train
env:
- name: USE_SERVING_RUNTIME
value: "{{ .Values.useServingRuntime }}"
- name: MLFLOW_TRACKING_URI
value: 'http://mlflow.mlflow.svc.cluster.local:5000'
- name: MLFLOW_S3_ENDPOINT_URL
Expand All @@ -36,6 +38,10 @@ spec:
- image: {{ include "image.dev-url" . }}-evaluation:latest
name: evaluate
env:
- name: USE_SERVING_RUNTIME
value: "{{ .Values.useServingRuntime }}"
- name: ENVIRONMENT
value: dev
- name: MLFLOW_TRACKING_URI
value: 'http://mlflow.mlflow.svc.cluster.local:5000'
- name: MLFLOW_S3_ENDPOINT_URL
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: model-controller
3 changes: 3 additions & 0 deletions manifests/helm/build/templates/triggertemplate-training.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,8 @@ spec:
labels:
tekton.dev/pipeline: cron-job
spec:
taskRunSpecs:
- pipelineTaskName: evaluate
taskServiceAccountName: model-controller
pipelineRef:
name: train-and-evaluate
2 changes: 2 additions & 0 deletions manifests/helm/build/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ app:
namespace: ${{values.component_id}}-dev
cluster: ${{ values.cluster }}

useServingRuntime: ${{ values.use_serving_runtime }}

git:
branch: main
repo: ${{ values.source_repository }}
Expand Down
99 changes: 99 additions & 0 deletions skeleton/.che/che-editor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
inline:
schemaVersion: 2.1.0
metadata:
name: {{ cookiecutter.project_name }}
commands:
- id: on-init-command
apply:
component: che-pycharm-injector
events:
preStart:
- on-init-command
attributes:
projects:
- name: {{ cookiecutter.project_name }}
git:
remotes:
origin: "{{ cookiecutter.git_repo }}"
checkoutFrom:
revision: main
components:
- name: che-pycharm-runtime-description
container:
image: 'quay.io/devfile/universal-developer-image:ubi8-9436df2'
command:
- /projector/entrypoint-volume.sh
env:
- name: PROJECTOR_ASSEMBLY_DIR
value: /projector
- name: PROJECTOR_CONFIG_DIR
value: /home/user/.jetbrains
volumeMounts:
- name: projector-volume
path: /projector
- name: projector-configuration
path: /home/user/.jetbrains
- name: projector-java-configuration
path: /home/user/.java
memoryLimit: 6144Mi
memoryRequest: 2048Mi
cpuLimit: 2000m
cpuRequest: 1500m
endpoints:
- name: intellij
attributes:
type: main
cookiesAuthEnabled: true
discoverable: false
urlRewriteSupported: true
targetPort: 8887
exposure: public
path: '/?backgroundColor=434343&wss'
secure: false
protocol: http
- name: intellij-redirect-1
targetPort: 13131
exposure: public
protocol: http
attributes:
discoverable: false
urlRewriteSupported: true
- name: intellij-redirect-2
targetPort: 13132
exposure: public
protocol: http
attributes:
discoverable: false
urlRewriteSupported: true
- name: intellij-redirect-3
targetPort: 13133
exposure: public
protocol: http
attributes:
discoverable: false
urlRewriteSupported: true
attributes:
app.kubernetes.io/component: che-pycharm-injector
app.kubernetes.io/part-of: che-pycharm.eclipse.org
- name: projector-volume
volume: { }
- name: projector-configuration
volume: { }
- name: projector-java-configuration
volume: { }
- name: che-pycharm-injector
container:
image: 'quay.io/che-incubator/che-pycharm:2022.1-next'
command: ["/projector/entrypoint-init-container.sh"]
env:
- name: PROJECTOR_VOLUME_MOUNT
value: /projector-volume
- name: PROJECTOR_ASSEMBLY_DIR
value: /projector
volumeMounts:
- name: projector-volume
path: /projector-volume
memoryLimit: 2G
cpuLimit: 500m
mountSources: true
sourceMapping: /projects
2 changes: 0 additions & 2 deletions skeleton/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,5 +163,3 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.vscode/
7 changes: 7 additions & 0 deletions skeleton/.vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=827846
// for the documentation about the extensions.json format
"recommendations": [
"ms-python.python"
]
}
6 changes: 3 additions & 3 deletions skeleton/catalog-info.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ metadata:
- python
- ai
links:
- url: https://devspaces{{cookiecutter.cluster}}/#https://{{cookiecutter.host}}/{{cookiecutter.destination}}
- url: https://devspaces{{cookiecutter.cluster}}/#https://{{cookiecutter.host}}/{{cookiecutter.destination}}?che-editor=che-incubator/che-code/latest
title: OpenShift Dev Spaces (VS Code)
icon: web
- url: https://devspaces{{cookiecutter.cluster}}/#https://{{cookiecutter.host}}/{{cookiecutter.destination}}?che-editor=https://raw.githubusercontent.com/che-incubator/jetbrains-editor-images/main/devfiles/next/che-pycharm/2022.1-next.yaml
- url: https://devspaces{{cookiecutter.cluster}}/#https://{{cookiecutter.host}}/{{cookiecutter.destination}}
title: OpenShift Dev Spaces (JetBrains PyCharm)
icon: web
spec:
Expand All @@ -42,7 +42,7 @@ spec:
lifecycle: production
owner: {{cookiecutter.owner}}
definition:
$text: https://{{cookiecutter.component_id}}-prod-app-{{cookiecutter.component_id}}-prod/{{cookiecutter.cluster}}/openapi.json
$text: https://{{cookiecutter.component_id}}-prod-app-{{cookiecutter.component_id}}-prod{{cookiecutter.cluster}}/openapi.json
---
apiVersion: backstage.io/v1alpha1
kind: API
Expand Down
25 changes: 24 additions & 1 deletion skeleton/common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,25 @@
from os import getenv

def _strtobool (val):
"""
Convert a string representation of truth to true (1) or false (0).
True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
'val' is anything else.

This is taken from distutils, which is deprecated in Python 3.12
"""
val = val.lower()
if val in ('y', 'yes', 't', 'true', 'on', '1'):
return 1
elif val in ('n', 'no', 'f', 'false', 'off', '0'):
return 0
else:
raise ValueError("invalid truth value %r" % (val,))


MODEL_NAME = "{{ cookiecutter.project_name }}"
MODEL_VERSION = "0.0.1"
MODEL_VERSION = "0.0.1"
USE_SERVING_RUNTIME = _strtobool(getenv("USE_SERVING_RUNTIME") or "False")

CACHE_TTL = 600
Loading