Skip to content

Commit

Permalink
Add named vector support (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
trengrj authored Dec 13, 2024
1 parent 8f6a1c1 commit 2489a6b
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 56 deletions.
50 changes: 43 additions & 7 deletions benchmarker/cmd/ann_benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ func writeChunk(chunk *Batch, client *weaviategrpc.WeaviateClient, cfg *Config)
if cfg.Tenant != "" {
objects[i].Tenant = cfg.Tenant
}
if cfg.NamedVector != "" {
vectors := make([]*weaviategrpc.Vectors, 1)
vectors[0] = &weaviategrpc.Vectors{
VectorBytes: encodeVector(vector),
Name: cfg.NamedVector,
}
objects[i].Vectors = vectors
} else {
objects[i].VectorBytes = encodeVector(vector)
}
if cfg.Filter {
nonRefProperties, err := structpb.NewStruct(map[string]interface{}{
"category": strconv.Itoa(chunk.Filters[i]),
Expand Down Expand Up @@ -186,9 +196,8 @@ func createSchema(cfg *Config, client *weaviate.Client) {
}

var classObj = &models.Class{
Class: cfg.ClassName,
Description: fmt.Sprintf("Created by the Weaviate Benchmarker at %s", time.Now().String()),
VectorIndexType: cfg.IndexType,
Class: cfg.ClassName,
Description: fmt.Sprintf("Created by the Weaviate Benchmarker at %s", time.Now().String()),
MultiTenancyConfig: &models.MultiTenancyConfig{
Enabled: multiTenancyEnabled,
},
Expand Down Expand Up @@ -295,7 +304,18 @@ func createSchema(cfg *Config, client *weaviate.Client) {

vectorIndexConfig["filterStrategy"] = cfg.FilterStrategy

classObj.VectorIndexConfig = vectorIndexConfig
if cfg.NamedVector != "" {
vectorConfig := make(map[string]models.VectorConfig)
vectorConfig[cfg.NamedVector] = models.VectorConfig{
Vectorizer: map[string]interface{}{"none": nil},
VectorIndexType: cfg.IndexType,
VectorIndexConfig: vectorIndexConfig,
}
classObj.VectorConfig = vectorConfig
} else {
classObj.VectorIndexType = cfg.IndexType
classObj.VectorIndexConfig = vectorIndexConfig
}

err = client.Schema().ClassCreator().WithClass(classObj).Do(context.Background())
if err != nil {
Expand Down Expand Up @@ -357,7 +377,14 @@ func updateEf(ef int, cfg *Config, client *weaviate.Client) {
panic(err)
}

vectorIndexConfig := classConfig.VectorIndexConfig.(map[string]interface{})
var vectorIndexConfig map[string]interface{}

if cfg.NamedVector != "" {
vectorIndexConfig = classConfig.VectorConfig[cfg.NamedVector].VectorIndexConfig.(map[string]interface{})
} else {
vectorIndexConfig = classConfig.VectorIndexConfig.(map[string]interface{})
}

switch cfg.IndexType {
case "hnsw":
vectorIndexConfig["ef"] = ef
Expand All @@ -368,7 +395,14 @@ func updateEf(ef int, cfg *Config, client *weaviate.Client) {
hnswConfig := vectorIndexConfig["hnsw"].(map[string]interface{})
hnswConfig["ef"] = ef
}
classConfig.VectorIndexConfig = vectorIndexConfig

if cfg.NamedVector != "" {
vectorConfig := classConfig.VectorConfig[cfg.NamedVector]
vectorConfig.VectorIndexConfig = vectorIndexConfig
classConfig.VectorConfig[cfg.NamedVector] = vectorConfig
} else {
classConfig.VectorIndexConfig = vectorIndexConfig
}

err = client.Schema().ClassUpdater().WithClass(classConfig).Do(context.Background())

Expand Down Expand Up @@ -1066,6 +1100,8 @@ func initAnnBenchmark() {
"vectors", "v", "", "Path to the hdf5 file containing the vectors")
annBenchmarkCommand.PersistentFlags().StringVarP(&globalConfig.ClassName,
"className", "c", "Vector", "Class name for testing")
annBenchmarkCommand.PersistentFlags().StringVar(&globalConfig.NamedVector,
"namedVector", "", "Named vector")
annBenchmarkCommand.PersistentFlags().StringVarP(&globalConfig.DistanceMetric,
"distance", "d", "", "Set distance metric (mandatory)")
annBenchmarkCommand.PersistentFlags().BoolVarP(&globalConfig.QueryOnly,
Expand Down Expand Up @@ -1171,7 +1207,7 @@ func benchmarkANN(cfg Config, queries Queries, neighbors Neighbors, filters []in
}

return QueryWithNeighbors{
Query: nearVectorQueryGrpc(cfg.ClassName, queries[i], cfg.Limit, tenant, filter),
Query: nearVectorQueryGrpc(&cfg, queries[i], tenant, filter),
Neighbors: neighbors[i],
}

Expand Down
1 change: 1 addition & 0 deletions benchmarker/cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type Config struct {
Parallel int
Limit int
ClassName string
NamedVector string
IndexType string
ReplicationFactor int
API string
Expand Down
2 changes: 1 addition & 1 deletion benchmarker/cmd/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func benchmarkDataset(cfg Config, queries Queries) Results {

if cfg.API == "grpc" {
return QueryWithNeighbors{
Query: nearVectorQueryGrpc(cfg.ClassName, queries[i], cfg.Limit, cfg.Tenant, 0),
Query: nearVectorQueryGrpc(&cfg, queries[i], cfg.Tenant, 0),
}
}

Expand Down
19 changes: 15 additions & 4 deletions benchmarker/cmd/random_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ func encodeVector(fs []float32) []byte {
return buf
}

func nearVectorQueryGrpc(className string, vec []float32, limit int, tenant string, filter int) []byte {
func nearVectorQueryGrpc(cfg *Config, vec []float32, tenant string, filter int) []byte {

searchRequest := &weaviategrpc.SearchRequest{
Collection: className,
Limit: uint32(limit),
Collection: cfg.ClassName,
Limit: uint32(cfg.Limit),
NearVector: &weaviategrpc.NearVector{
VectorBytes: encodeVector(vec),
},
Expand All @@ -153,6 +153,17 @@ func nearVectorQueryGrpc(className string, vec []float32, limit int, tenant stri
searchRequest.Tenant = tenant
}

if cfg.NamedVector != "" {
searchRequest.NearVector = &weaviategrpc.NearVector{
Targets: &weaviategrpc.Targets{
TargetVectors: []string{cfg.NamedVector},
},
VectorPerTarget: map[string][]byte{
cfg.NamedVector: encodeVector(vec),
},
}
}

if filter >= 0 {
searchRequest.Filters = &weaviategrpc.Filters{
TestValue: &weaviategrpc.Filters_ValueText{
Expand Down Expand Up @@ -188,7 +199,7 @@ func benchmarkNearVector(cfg Config) Results {

if cfg.API == "grpc" {
return QueryWithNeighbors{
Query: nearVectorQueryGrpc(cfg.ClassName, randomVector(cfg.Dimensions), cfg.Limit, cfg.Tenant, 0),
Query: nearVectorQueryGrpc(&cfg, randomVector(cfg.Dimensions), cfg.Tenant, 0),
}
}

Expand Down
31 changes: 16 additions & 15 deletions benchmarker/go.mod
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
module github.com/semi-technologies/weaviate-benchmarking/benchmarker

go 1.21
go 1.22.0

toolchain go1.23.3

require (
github.com/google/uuid v1.6.0
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1
github.com/hashicorp/go-retryablehttp v0.7.5
github.com/pkg/errors v0.9.1
github.com/prometheus/common v0.48.0
github.com/prometheus/common v0.60.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.0.0
github.com/stretchr/testify v1.9.0
github.com/weaviate/hdf5 v0.0.0-20230911114900-3cd888ffadcd
github.com/weaviate/weaviate v1.26.0-rc.0
github.com/weaviate/weaviate-go-client/v4 v4.14.2
github.com/weaviate/weaviate v1.28.0
github.com/weaviate/weaviate-go-client/v4 v4.16.1
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
google.golang.org/grpc v1.64.0
google.golang.org/protobuf v1.34.1
google.golang.org/grpc v1.66.2
google.golang.org/protobuf v1.34.2
)

require (
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-openapi/analysis v0.21.2 // indirect
github.com/go-openapi/errors v0.22.0 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/loads v0.21.1 // indirect
github.com/go-openapi/spec v0.20.4 // indirect
github.com/go-openapi/strfmt v0.23.0 // indirect
Expand All @@ -38,15 +38,16 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/spf13/pflag v1.0.3 // indirect
go.mongodb.org/mongo-driver v1.14.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.20.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 // indirect
golang.org/x/net v0.29.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading

0 comments on commit 2489a6b

Please sign in to comment.