Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add named vector support #42

Merged
merged 1 commit into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 43 additions & 7 deletions benchmarker/cmd/ann_benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ func writeChunk(chunk *Batch, client *weaviategrpc.WeaviateClient, cfg *Config)
if cfg.Tenant != "" {
objects[i].Tenant = cfg.Tenant
}
if cfg.NamedVector != "" {
vectors := make([]*weaviategrpc.Vectors, 1)
vectors[0] = &weaviategrpc.Vectors{
VectorBytes: encodeVector(vector),
Name: cfg.NamedVector,
}
objects[i].Vectors = vectors
} else {
objects[i].VectorBytes = encodeVector(vector)
}
if cfg.Filter {
nonRefProperties, err := structpb.NewStruct(map[string]interface{}{
"category": strconv.Itoa(chunk.Filters[i]),
Expand Down Expand Up @@ -186,9 +196,8 @@ func createSchema(cfg *Config, client *weaviate.Client) {
}

var classObj = &models.Class{
Class: cfg.ClassName,
Description: fmt.Sprintf("Created by the Weaviate Benchmarker at %s", time.Now().String()),
VectorIndexType: cfg.IndexType,
Class: cfg.ClassName,
Description: fmt.Sprintf("Created by the Weaviate Benchmarker at %s", time.Now().String()),
MultiTenancyConfig: &models.MultiTenancyConfig{
Enabled: multiTenancyEnabled,
},
Expand Down Expand Up @@ -295,7 +304,18 @@ func createSchema(cfg *Config, client *weaviate.Client) {

vectorIndexConfig["filterStrategy"] = cfg.FilterStrategy

classObj.VectorIndexConfig = vectorIndexConfig
if cfg.NamedVector != "" {
vectorConfig := make(map[string]models.VectorConfig)
vectorConfig[cfg.NamedVector] = models.VectorConfig{
Vectorizer: map[string]interface{}{"none": nil},
VectorIndexType: cfg.IndexType,
VectorIndexConfig: vectorIndexConfig,
}
classObj.VectorConfig = vectorConfig
} else {
classObj.VectorIndexType = cfg.IndexType
classObj.VectorIndexConfig = vectorIndexConfig
}

err = client.Schema().ClassCreator().WithClass(classObj).Do(context.Background())
if err != nil {
Expand Down Expand Up @@ -357,7 +377,14 @@ func updateEf(ef int, cfg *Config, client *weaviate.Client) {
panic(err)
}

vectorIndexConfig := classConfig.VectorIndexConfig.(map[string]interface{})
var vectorIndexConfig map[string]interface{}

if cfg.NamedVector != "" {
vectorIndexConfig = classConfig.VectorConfig[cfg.NamedVector].VectorIndexConfig.(map[string]interface{})
} else {
vectorIndexConfig = classConfig.VectorIndexConfig.(map[string]interface{})
}

switch cfg.IndexType {
case "hnsw":
vectorIndexConfig["ef"] = ef
Expand All @@ -368,7 +395,14 @@ func updateEf(ef int, cfg *Config, client *weaviate.Client) {
hnswConfig := vectorIndexConfig["hnsw"].(map[string]interface{})
hnswConfig["ef"] = ef
}
classConfig.VectorIndexConfig = vectorIndexConfig

if cfg.NamedVector != "" {
vectorConfig := classConfig.VectorConfig[cfg.NamedVector]
vectorConfig.VectorIndexConfig = vectorIndexConfig
classConfig.VectorConfig[cfg.NamedVector] = vectorConfig
} else {
classConfig.VectorIndexConfig = vectorIndexConfig
}

err = client.Schema().ClassUpdater().WithClass(classConfig).Do(context.Background())

Expand Down Expand Up @@ -1066,6 +1100,8 @@ func initAnnBenchmark() {
"vectors", "v", "", "Path to the hdf5 file containing the vectors")
annBenchmarkCommand.PersistentFlags().StringVarP(&globalConfig.ClassName,
"className", "c", "Vector", "Class name for testing")
annBenchmarkCommand.PersistentFlags().StringVar(&globalConfig.NamedVector,
"namedVector", "", "Named vector")
annBenchmarkCommand.PersistentFlags().StringVarP(&globalConfig.DistanceMetric,
"distance", "d", "", "Set distance metric (mandatory)")
annBenchmarkCommand.PersistentFlags().BoolVarP(&globalConfig.QueryOnly,
Expand Down Expand Up @@ -1171,7 +1207,7 @@ func benchmarkANN(cfg Config, queries Queries, neighbors Neighbors, filters []in
}

return QueryWithNeighbors{
Query: nearVectorQueryGrpc(cfg.ClassName, queries[i], cfg.Limit, tenant, filter),
Query: nearVectorQueryGrpc(&cfg, queries[i], tenant, filter),
Neighbors: neighbors[i],
}

Expand Down
1 change: 1 addition & 0 deletions benchmarker/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Config struct {
Parallel int
Limit int
ClassName string
NamedVector string
IndexType string
ReplicationFactor int
API string
Expand Down
2 changes: 1 addition & 1 deletion benchmarker/cmd/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func benchmarkDataset(cfg Config, queries Queries) Results {

if cfg.API == "grpc" {
return QueryWithNeighbors{
Query: nearVectorQueryGrpc(cfg.ClassName, queries[i], cfg.Limit, cfg.Tenant, 0),
Query: nearVectorQueryGrpc(&cfg, queries[i], cfg.Tenant, 0),
}
}

Expand Down
19 changes: 15 additions & 4 deletions benchmarker/cmd/random_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ func encodeVector(fs []float32) []byte {
return buf
}

func nearVectorQueryGrpc(className string, vec []float32, limit int, tenant string, filter int) []byte {
func nearVectorQueryGrpc(cfg *Config, vec []float32, tenant string, filter int) []byte {

searchRequest := &weaviategrpc.SearchRequest{
Collection: className,
Limit: uint32(limit),
Collection: cfg.ClassName,
Limit: uint32(cfg.Limit),
NearVector: &weaviategrpc.NearVector{
VectorBytes: encodeVector(vec),
},
Expand All @@ -153,6 +153,17 @@ func nearVectorQueryGrpc(className string, vec []float32, limit int, tenant stri
searchRequest.Tenant = tenant
}

if cfg.NamedVector != "" {
searchRequest.NearVector = &weaviategrpc.NearVector{
Targets: &weaviategrpc.Targets{
TargetVectors: []string{cfg.NamedVector},
},
VectorPerTarget: map[string][]byte{
cfg.NamedVector: encodeVector(vec),
},
}
}

if filter >= 0 {
searchRequest.Filters = &weaviategrpc.Filters{
TestValue: &weaviategrpc.Filters_ValueText{
Expand Down Expand Up @@ -188,7 +199,7 @@ func benchmarkNearVector(cfg Config) Results {

if cfg.API == "grpc" {
return QueryWithNeighbors{
Query: nearVectorQueryGrpc(cfg.ClassName, randomVector(cfg.Dimensions), cfg.Limit, cfg.Tenant, 0),
Query: nearVectorQueryGrpc(&cfg, randomVector(cfg.Dimensions), cfg.Tenant, 0),
}
}

Expand Down
31 changes: 16 additions & 15 deletions benchmarker/go.mod
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
module github.com/semi-technologies/weaviate-benchmarking/benchmarker

go 1.21
go 1.22.0

toolchain go1.23.3

require (
github.com/google/uuid v1.6.0
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1
github.com/hashicorp/go-retryablehttp v0.7.5
github.com/pkg/errors v0.9.1
github.com/prometheus/common v0.48.0
github.com/prometheus/common v0.60.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.0.0
github.com/stretchr/testify v1.9.0
github.com/weaviate/hdf5 v0.0.0-20230911114900-3cd888ffadcd
github.com/weaviate/weaviate v1.26.0-rc.0
github.com/weaviate/weaviate-go-client/v4 v4.14.2
github.com/weaviate/weaviate v1.28.0
github.com/weaviate/weaviate-go-client/v4 v4.16.1
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
google.golang.org/grpc v1.64.0
google.golang.org/protobuf v1.34.1
google.golang.org/grpc v1.66.2
google.golang.org/protobuf v1.34.2
)

require (
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-openapi/analysis v0.21.2 // indirect
github.com/go-openapi/errors v0.22.0 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/loads v0.21.1 // indirect
github.com/go-openapi/spec v0.20.4 // indirect
github.com/go-openapi/strfmt v0.23.0 // indirect
Expand All @@ -38,15 +38,16 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/spf13/pflag v1.0.3 // indirect
go.mongodb.org/mongo-driver v1.14.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.20.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 // indirect
golang.org/x/net v0.29.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading