api: adds BackendRef into LLMBackendSpec (#40)

This adds `backendRef` to LLMBackendSpec which specifies the "backend" either Service or Backend resource of Envoy Gateway. The choice of not embedding is intentional - A backend can be a target of routing in HTTPRoute and can be either of these two types. Hence this is not suitable for embedding. In addition, in the implementation, we won't directly use or reference them, but just simply attach the necessary logic by the names, so basically no benefit by doing so. --------- Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
envoyproxy · Dec 12, 2024 · cd38875 · cd38875
1 parent 8299827
commit cd38875
Show file tree

Hide file tree

Showing 8 changed files with 131 additions and 1 deletion.
diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go
@@ -1,6 +1,7 @@
 package v1alpha1
 
 import (
+	egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
 )
@@ -64,6 +65,12 @@ type LLMRouteSpec struct {
 
 // LLMBackend is a resource that represents a single backend for LLMRoute.
 // A backend is a service that handles traffic with a concrete API specification.
+//
+// A LLMBackend is "attached" to a Backend which is either a k8s Service or a Backend resource of the Envoy Gateway.
+//
+// When a backend with an attached LLMBackend is used as a routing target in the LLMRoute (more precisely, the
+// HTTPRouteSpec defined in the LLMRoute), the ai-gateway will generate the necessary configuration to do
+// the backend specific logic in the final HTTPRoute.
 type LLMBackend struct {
 	metav1.TypeMeta   `json:",inline"`
 	metav1.ObjectMeta `json:"metadata,omitempty"`
@@ -88,7 +95,17 @@ type LLMBackendSpec struct {
 	// the pair of LLMRouteSpec.APISchema and LLMBackendSpec.APISchema.
 	//
 	// This is required to be set.
+	//
+	// +kubebuilder:validation:Required
 	APISchema LLMAPISchema `json:"outputSchema"`
+	// BackendRef is the reference to the Backend resource that this LLMBackend corresponds to.
+	//
+	// A backend can be of either k8s Service or Backend resource of Envoy Gateway.
+	//
+	// This is required to be set.
+	//
+	// +kubebuilder:validation:Required
+	BackendRef egv1a1.BackendRef `json:"backendRef"`
 }
 
 // LLMAPISchema defines the API schema of either LLMRoute (the input) or LLMBackend (the output).

diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/go.mod b/go.mod
@@ -6,6 +6,7 @@ replace github.com/imdario/mergo => github.com/imdario/mergo v0.3.16
 
 require (
 	github.com/aws/aws-sdk-go v1.55.5
+	github.com/envoyproxy/gateway v1.2.3
 	github.com/envoyproxy/go-control-plane v0.13.1
 	github.com/stretchr/testify v1.10.0
 	k8s.io/apimachinery v0.31.3

diff --git a/go.sum b/go.sum
@@ -12,6 +12,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU=
 github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/envoyproxy/gateway v1.2.3 h1:Qne11MOjNPmawTCFi35iuYvwA3kTqmBTFE7wDZkIgmo=
+github.com/envoyproxy/gateway v1.2.3/go.mod h1:JkrLVKpgdd3D6Umr6uw1Hu98lCCpxU2pzK32qeM67U0=
 github.com/envoyproxy/go-control-plane v0.13.1 h1:vPfJZCkob6yTMEgS+0TwfTUfbHjfy/6vOJ8hUWX/uXE=
 github.com/envoyproxy/go-control-plane v0.13.1/go.mod h1:X45hY0mufo6Fd0KW3rqsGvQMw58jvjymeCzBU3mWyHw=
 github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM=
@@ -79,6 +81,7 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
 github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
 github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=

diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackends.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackends.yaml
@@ -20,6 +20,12 @@ spec:
         description: |-
           LLMBackend is a resource that represents a single backend for LLMRoute.
           A backend is a service that handles traffic with a concrete API specification.
+
+          A LLMBackend is "attached" to a Backend which is either a k8s Service or a Backend resource of the Envoy Gateway.
+
+          When a backend with an attached LLMBackend is used as a routing target in the LLMRoute (more precisely, the
+          HTTPRouteSpec defined in the LLMRoute), the ai-gateway will generate the necessary configuration to do
+          the backend specific logic in the final HTTPRoute.
         properties:
           apiVersion:
             description: |-
@@ -41,6 +47,90 @@ spec:
           spec:
             description: Spec defines the details of the LLM policy.
             properties:
+              backendRef:
+                description: |-
+                  BackendRef is the reference to the Backend resource that this LLMBackend corresponds to.
+
+                  A backend can be of either k8s Service or Backend resource of Envoy Gateway.
+
+                  This is required to be set.
+                properties:
+                  fallback:
+                    description: |-
+                      Fallback indicates whether the backend is designated as a fallback.
+                      Multiple fallback backends can be configured.
+                      It is highly recommended to configure active or passive health checks to ensure that failover can be detected
+                      when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again.
+                      The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when
+                      the health of the active backends falls below 72%.
+                    type: boolean
+                  group:
+                    default: ""
+                    description: |-
+                      Group is the group of the referent. For example, "gateway.networking.k8s.io".
+                      When unspecified or empty string, core API group is inferred.
+                    maxLength: 253
+                    pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
+                    type: string
+                  kind:
+                    default: Service
+                    description: |-
+                      Kind is the Kubernetes resource kind of the referent. For example
+                      "Service".
+
+                      Defaults to "Service" when not specified.
+
+                      ExternalName services can refer to CNAME DNS records that may live
+                      outside of the cluster and as such are difficult to reason about in
+                      terms of conformance. They also may not be safe to forward to (see
+                      CVE-2021-25740 for more information). Implementations SHOULD NOT
+                      support ExternalName Services.
+
+                      Support: Core (Services with a type other than ExternalName)
+
+                      Support: Implementation-specific (Services with type ExternalName)
+                    maxLength: 63
+                    minLength: 1
+                    pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$
+                    type: string
+                  name:
+                    description: Name is the name of the referent.
+                    maxLength: 253
+                    minLength: 1
+                    type: string
+                  namespace:
+                    description: |-
+                      Namespace is the namespace of the backend. When unspecified, the local
+                      namespace is inferred.
+
+                      Note that when a namespace different than the local namespace is specified,
+                      a ReferenceGrant object is required in the referent namespace to allow that
+                      namespace's owner to accept the reference. See the ReferenceGrant
+                      documentation for details.
+
+                      Support: Core
+                    maxLength: 63
+                    minLength: 1
+                    pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$
+                    type: string
+                  port:
+                    description: |-
+                      Port specifies the destination port number to use for this resource.
+                      Port is required when the referent is a Kubernetes Service. In this
+                      case, the port number is the service port number, not the target port.
+                      For other resources, destination port might be derived from the referent
+                      resource or this field.
+                    format: int32
+                    maximum: 65535
+                    minimum: 1
+                    type: integer
+                required:
+                - name
+                type: object
+                x-kubernetes-validations:
+                - message: Must have port for Service reference
+                  rule: '(size(self.group) == 0 && self.kind == ''Service'') ? has(self.port)
+                    : true'
               outputSchema:
                 description: |-
                   APISchema specifies the API schema of the output format of requests from
@@ -63,6 +153,7 @@ spec:
                 - schema
                 type: object
             required:
+            - backendRef
             - outputSchema
             type: object
         type: object

diff --git a/tests/cel-validation/main_test.go b/tests/cel-validation/main_test.go
@@ -111,6 +111,7 @@ func TestLLMBackends(t *testing.T) {
 		expErr string
 	}{
 		{name: "basic.yaml"},
+		{name: "basic-eg-backend.yaml"},
 		{
 			name:   "unknown_schema.yaml",
 			expErr: "spec.outputSchema.schema: Unsupported value: \"SomeRandomVendor\": supported values: \"OpenAI\", \"AWSBedrock\"",

diff --git a/tests/cel-validation/testdata/llmbackends/basic-eg-backend.yaml b/tests/cel-validation/testdata/llmbackends/basic-eg-backend.yaml
@@ -0,0 +1,12 @@
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: LLMBackend
+metadata:
+  name: eg-backend
+  namespace: default
+spec:
+  outputSchema:
+    schema: AWSBedrock
+  backendRef:
+    name: eg-backend
+    kind: Backend
+    group: gateway.envoyproxy.io
diff --git a/tests/cel-validation/testdata/llmbackends/basic.yaml b/tests/cel-validation/testdata/llmbackends/basic.yaml
@@ -6,3 +6,7 @@ metadata:
 spec:
   outputSchema:
     schema: AWSBedrock
+  backendRef:
+    name: dog-service
+    kind: Service
+    port: 80