From cd3887594421c6f5b40f2ec0f49a2e3d3ffb5a9e Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Thu, 12 Dec 2024 09:49:57 -0800 Subject: [PATCH] api: adds BackendRef into LLMBackendSpec (#40) This adds `backendRef` to LLMBackendSpec which specifies the "backend" either Service or Backend resource of Envoy Gateway. The choice of not embedding is intentional - A backend can be a target of routing in HTTPRoute and can be either of these two types. Hence this is not suitable for embedding. In addition, in the implementation, we won't directly use or reference them, but just simply attach the necessary logic by the names, so basically no benefit by doing so. --------- Signed-off-by: Takeshi Yoneda --- api/v1alpha1/api.go | 17 ++++ api/v1alpha1/zz_generated.deepcopy.go | 3 +- go.mod | 1 + go.sum | 3 + .../aigateway.envoyproxy.io_llmbackends.yaml | 91 +++++++++++++++++++ tests/cel-validation/main_test.go | 1 + .../llmbackends/basic-eg-backend.yaml | 12 +++ .../testdata/llmbackends/basic.yaml | 4 + 8 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 tests/cel-validation/testdata/llmbackends/basic-eg-backend.yaml diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go index a14caaf8c..33a9efe4e 100644 --- a/api/v1alpha1/api.go +++ b/api/v1alpha1/api.go @@ -1,6 +1,7 @@ package v1alpha1 import ( + egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" ) @@ -64,6 +65,12 @@ type LLMRouteSpec struct { // LLMBackend is a resource that represents a single backend for LLMRoute. // A backend is a service that handles traffic with a concrete API specification. +// +// A LLMBackend is "attached" to a Backend which is either a k8s Service or a Backend resource of the Envoy Gateway. +// +// When a backend with an attached LLMBackend is used as a routing target in the LLMRoute (more precisely, the +// HTTPRouteSpec defined in the LLMRoute), the ai-gateway will generate the necessary configuration to do +// the backend specific logic in the final HTTPRoute. type LLMBackend struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` @@ -88,7 +95,17 @@ type LLMBackendSpec struct { // the pair of LLMRouteSpec.APISchema and LLMBackendSpec.APISchema. // // This is required to be set. + // + // +kubebuilder:validation:Required APISchema LLMAPISchema `json:"outputSchema"` + // BackendRef is the reference to the Backend resource that this LLMBackend corresponds to. + // + // A backend can be of either k8s Service or Backend resource of Envoy Gateway. + // + // This is required to be set. + // + // +kubebuilder:validation:Required + BackendRef egv1a1.BackendRef `json:"backendRef"` } // LLMAPISchema defines the API schema of either LLMRoute (the input) or LLMBackend (the output). diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 69557e392..9063f1cbb 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -28,7 +28,7 @@ func (in *LLMBackend) DeepCopyInto(out *LLMBackend) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMBackend. @@ -85,6 +85,7 @@ func (in *LLMBackendList) DeepCopyObject() runtime.Object { func (in *LLMBackendSpec) DeepCopyInto(out *LLMBackendSpec) { *out = *in out.APISchema = in.APISchema + in.BackendRef.DeepCopyInto(&out.BackendRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMBackendSpec. diff --git a/go.mod b/go.mod index e628f566a..695c05e68 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ replace github.com/imdario/mergo => github.com/imdario/mergo v0.3.16 require ( github.com/aws/aws-sdk-go v1.55.5 + github.com/envoyproxy/gateway v1.2.3 github.com/envoyproxy/go-control-plane v0.13.1 github.com/stretchr/testify v1.10.0 k8s.io/apimachinery v0.31.3 diff --git a/go.sum b/go.sum index 70499d6d7..b41b9b710 100644 --- a/go.sum +++ b/go.sum @@ -12,6 +12,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/envoyproxy/gateway v1.2.3 h1:Qne11MOjNPmawTCFi35iuYvwA3kTqmBTFE7wDZkIgmo= +github.com/envoyproxy/gateway v1.2.3/go.mod h1:JkrLVKpgdd3D6Umr6uw1Hu98lCCpxU2pzK32qeM67U0= github.com/envoyproxy/go-control-plane v0.13.1 h1:vPfJZCkob6yTMEgS+0TwfTUfbHjfy/6vOJ8hUWX/uXE= github.com/envoyproxy/go-control-plane v0.13.1/go.mod h1:X45hY0mufo6Fd0KW3rqsGvQMw58jvjymeCzBU3mWyHw= github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= @@ -79,6 +81,7 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackends.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackends.yaml index 8ba4d2694..a03922dd0 100644 --- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackends.yaml +++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackends.yaml @@ -20,6 +20,12 @@ spec: description: |- LLMBackend is a resource that represents a single backend for LLMRoute. A backend is a service that handles traffic with a concrete API specification. + + A LLMBackend is "attached" to a Backend which is either a k8s Service or a Backend resource of the Envoy Gateway. + + When a backend with an attached LLMBackend is used as a routing target in the LLMRoute (more precisely, the + HTTPRouteSpec defined in the LLMRoute), the ai-gateway will generate the necessary configuration to do + the backend specific logic in the final HTTPRoute. properties: apiVersion: description: |- @@ -41,6 +47,90 @@ spec: spec: description: Spec defines the details of the LLM policy. properties: + backendRef: + description: |- + BackendRef is the reference to the Backend resource that this LLMBackend corresponds to. + + A backend can be of either k8s Service or Backend resource of Envoy Gateway. + + This is required to be set. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') ? has(self.port) + : true' outputSchema: description: |- APISchema specifies the API schema of the output format of requests from @@ -63,6 +153,7 @@ spec: - schema type: object required: + - backendRef - outputSchema type: object type: object diff --git a/tests/cel-validation/main_test.go b/tests/cel-validation/main_test.go index d6e6e6c6b..763d1bcbd 100644 --- a/tests/cel-validation/main_test.go +++ b/tests/cel-validation/main_test.go @@ -111,6 +111,7 @@ func TestLLMBackends(t *testing.T) { expErr string }{ {name: "basic.yaml"}, + {name: "basic-eg-backend.yaml"}, { name: "unknown_schema.yaml", expErr: "spec.outputSchema.schema: Unsupported value: \"SomeRandomVendor\": supported values: \"OpenAI\", \"AWSBedrock\"", diff --git a/tests/cel-validation/testdata/llmbackends/basic-eg-backend.yaml b/tests/cel-validation/testdata/llmbackends/basic-eg-backend.yaml new file mode 100644 index 000000000..e9c5843a8 --- /dev/null +++ b/tests/cel-validation/testdata/llmbackends/basic-eg-backend.yaml @@ -0,0 +1,12 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackend +metadata: + name: eg-backend + namespace: default +spec: + outputSchema: + schema: AWSBedrock + backendRef: + name: eg-backend + kind: Backend + group: gateway.envoyproxy.io diff --git a/tests/cel-validation/testdata/llmbackends/basic.yaml b/tests/cel-validation/testdata/llmbackends/basic.yaml index 899987f63..08739e40d 100644 --- a/tests/cel-validation/testdata/llmbackends/basic.yaml +++ b/tests/cel-validation/testdata/llmbackends/basic.yaml @@ -6,3 +6,7 @@ metadata: spec: outputSchema: schema: AWSBedrock + backendRef: + name: dog-service + kind: Service + port: 80