Skip to content

Commit

Permalink
api: make LLMRoute reference HTTPRoute (#39)
Browse files Browse the repository at this point in the history
This commit is a follow up on #20. Basically, this makes LLMRoute 
a pure "addition" to the existing standardized HTTPRoute. 
This makes it possible to configure something like 
```
kind: LLMRoute
metadata:
  name: llm-route
spec:
  inputSchema: OpenAI
  httpRouteRef:
    name: my-llm-route
---
kind: HTTPRoute
metadata:
  name: my-llm-route
spec:
  matches:
     - headers:
         key: x-envoy-ai-gateway-llm-model
         value: llama3-70b 
       backendRefs: 
       - kserve:
         weight: 20
       - aws-bedrock:
         weight: 80
```

where LLMRoute is purely referencing HTTPRoute and 
users can configure whatever routing condition in a standardized way
via HTTPRoute while leveraging the LLM specific information, in this
case
x-envoy-ai-gateway-llm-model header.

In the implementation, though it's not merged yet, we have to do the 
routing calculation in the extproc by actually analyzing the referenced 
HTTPRoute, and emulate the behavior in order to do the transformation.
The reason is that the routing decision is made at the very end of
filter chain
in general, and by the time we invoke extproc, we don't have that info.
Furthermore, `x-envoy-ai-gateway-llm-model` is not available before
extproc.


As a bonus of this, we no longer need TargetRef at LLMRoute level since
that's within
the HTTPRoute resources. This will really simplify the PoC
implementation.

---------

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
  • Loading branch information
mathetake authored Dec 10, 2024
1 parent 683c3a2 commit 8299827
Show file tree
Hide file tree
Showing 8 changed files with 2,842 additions and 171 deletions.
36 changes: 20 additions & 16 deletions api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
)

// +kubebuilder:object:root=true
Expand All @@ -16,6 +16,9 @@ import (
// receive. And then the Gateway will route the traffic to the appropriate LLMBackend based
// on the output schema of the LLMBackend while doing the other necessary jobs like
// upstream authentication, rate limit, etc.
//
// LLMRoute generates a HTTPRoute resource based on the configuration basis for routing the traffic.
// The generated HTTPRoute has the owner reference set to this LLMRoute.
type LLMRoute struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Expand Down Expand Up @@ -43,23 +46,18 @@ type LLMRouteSpec struct {
// +kubebuilder:validation:Required
// +kubebuilder:validation:XValidation:rule="self.schema == 'OpenAI'"
APISchema LLMAPISchema `json:"inputSchema"`
// TargetRefs are the names of the Gateway resources this policy is being attached to.
// The namespace is "local", i.e. the same namespace as the LLMRoute.
// HTTPRoute is the base HTTPRouteSpec (https://gateway-api.sigs.k8s.io/api-types/httproute/) in
// the Gateway API on which this LLMRoute will be implemented. AI Gateway controller will generate a HTTPRoute based
// on the configuration given here with the additional modifications to achieve the necessary jobs,
// notably inserting the AI Gateway external processor filter.
//
// +optional
// +kubebuilder:validation:MaxItems=128
TargetRefs []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName `json:"targetRefs"`
// BackendRefs lists the LLMBackends that this LLMRoute will route traffic to.
// The namespace is "local", i.e. the same namespace as the LLMRoute.
// In the matching rules in the HTTPRoute here, `x-envoy-ai-gateway-llm-model` header
// can be used to describe the routing behavior.
//
// +kubebuilder:validation:MaxItems=128
BackendRefs []LLMBackendLocalRef `json:"backendRefs,omitempty"`
}

// LLMBackendLocalRef is a reference to a LLMBackend resource in the "local" namespace.
type LLMBackendLocalRef struct {
// Name is the name of the LLMBackend in the same namespace as the LLMRoute.
Name string `json:"name"`
// Currently, only the exact header matching is supported, otherwise the configuration will be rejected.
//
// +kubebuilder:validation:Required
HTTPRoute gwapiv1.HTTPRouteSpec `json:"httpRoute"`
}

// +kubebuilder:object:root=true
Expand Down Expand Up @@ -123,3 +121,9 @@ const (
// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
APISchemaAWSBedrock APISchema = "AWSBedrock"
)

const (
// LLMModelHeaderKey is the header key whose value is extracted from the request by the ai-gateway.
// This can be used to describe the routing behavior in HTTPRoute referenced by LLMRoute.
LLMModelHeaderKey = "x-envoy-ai-gateway-llm-model"
)
29 changes: 1 addition & 28 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 19 additions & 15 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module github.com/envoyproxy/ai-gateway

go 1.23.2

replace github.com/imdario/mergo => github.com/imdario/mergo v0.3.16

require (
github.com/aws/aws-sdk-go v1.55.5
github.com/envoyproxy/go-control-plane v0.13.1
Expand All @@ -14,10 +16,11 @@ require (
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect
github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
github.com/evanphx/json-patch v5.9.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
Expand All @@ -32,43 +35,44 @@ require (
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/imdario/mergo v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/common v0.60.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.26.0 // indirect
golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e // indirect
golang.org/x/net v0.31.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/term v0.26.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/time v0.5.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
google.golang.org/grpc v1.66.2 // indirect
google.golang.org/protobuf v1.34.2 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 // indirect
google.golang.org/grpc v1.67.1 // indirect
google.golang.org/protobuf v1.35.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.31.1 // indirect
k8s.io/apiextensions-apiserver v0.31.1 // indirect
k8s.io/client-go v0.31.1 // indirect
k8s.io/api v0.31.2 // indirect
k8s.io/apiextensions-apiserver v0.31.2 // indirect
k8s.io/client-go v0.31.2 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240423202451-8948a665c108 // indirect
k8s.io/kube-openapi v0.0.0-20240521193020-835d969ad83a // indirect
k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078 // indirect
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.3 // indirect
Expand Down
Loading

0 comments on commit 8299827

Please sign in to comment.