From 1ab8343147c19774eb2064335563e5302fc26cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Abdel=20Rodr=C3=ADguez?= Date: Fri, 25 Oct 2024 12:55:19 +0200 Subject: [PATCH] Adding support for lasq (#37) --- benchmarker/cmd/ann_benchmark.go | 33 ++++++++++++++++++++++++++++++-- benchmarker/cmd/config.go | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/benchmarker/cmd/ann_benchmark.go b/benchmarker/cmd/ann_benchmark.go index dc9dcc1..b132768 100644 --- a/benchmarker/cmd/ann_benchmark.go +++ b/benchmarker/cmd/ann_benchmark.go @@ -39,8 +39,9 @@ import ( type CompressionType byte const ( - CompressionTypePQ CompressionType = 0 - CompressionTypeSQ CompressionType = 1 + CompressionTypePQ CompressionType = 0 + CompressionTypeSQ CompressionType = 1 + CompressionTypeLASQ CompressionType = 2 ) // Batch of vectors and offset for writing to Weaviate @@ -239,6 +240,17 @@ func createSchema(cfg *Config, client *weaviate.Client) { "trainingLimit": cfg.TrainingLimit, }, } + } else if cfg.LASQ == "auto" { + vectorIndexConfig = map[string]interface{}{ + "distance": cfg.DistanceMetric, + "efConstruction": float64(cfg.EfConstruction), + "maxConnections": float64(cfg.MaxConnections), + "cleanupIntervalSeconds": cfg.CleanupIntervalSeconds, + "lasq": map[string]interface{}{ + "enabled": true, + "trainingLimit": cfg.TrainingLimit, + }, + } } } else if cfg.IndexType == "flat" { vectorIndexConfig = map[string]interface{}{ @@ -425,6 +437,11 @@ func enableCompression(cfg *Config, client *weaviate.Client, dimensions uint, co "trainingLimit": cfg.TrainingLimit, "rescoreLimit": cfg.RescoreLimit, } + case CompressionTypeLASQ: + vectorIndexConfig["lasq"] = map[string]interface{}{ + "enabled": true, + "trainingLimit": cfg.TrainingLimit, + } } classConfig.VectorIndexConfig = vectorIndexConfig @@ -439,6 +456,8 @@ func enableCompression(cfg *Config, client *weaviate.Client, dimensions uint, co log.WithFields(log.Fields{"segments": segments, "dimensions": dimensions}).Printf("Enabled PQ. Waiting for shard ready.\n") case CompressionTypeSQ: log.Printf("Enabled SQ. Waiting for shard ready.\n") + case CompressionTypeLASQ: + log.Printf("Enabled LASQ. Waiting for shard ready.\n") } start := time.Now() @@ -475,6 +494,8 @@ func enableCompression(cfg *Config, client *weaviate.Client, dimensions uint, co log.WithFields(log.Fields{"segments": segments, "dimensions": dimensions}).Printf("PQ Completed in %v\n", endTime.Sub(start)) case CompressionTypeSQ: log.Printf("SQ Completed in %v\n", endTime.Sub(start)) + case CompressionTypeLASQ: + log.Printf("LASQ Completed in %v\n", endTime.Sub(start)) } } @@ -803,6 +824,12 @@ func loadANNBenchmarksFile(file *hdf5.File, cfg *Config, client *weaviate.Client enableCompression(cfg, client, dimensions, CompressionTypeSQ) loadHdf5Train(file, cfg, uint(cfg.TrainingLimit), 0, 0) + } else if cfg.LASQ == "enabled" { + dimensions := loadHdf5Train(file, cfg, 0, uint(cfg.TrainingLimit), 0) + log.Printf("Pausing to enable LASQ.") + enableCompression(cfg, client, dimensions, CompressionTypeLASQ) + loadHdf5Train(file, cfg, uint(cfg.TrainingLimit), 0, 0) + } else { loadHdf5Train(file, cfg, 0, maxRows, 0) } @@ -1055,6 +1082,8 @@ func initAnnBenchmark() { "pq", "disabled", "Set PQ (disabled, auto, or enabled) (default disabled)") annBenchmarkCommand.PersistentFlags().StringVar(&globalConfig.SQ, "sq", "disabled", "Set SQ (disabled, auto, or enabled) (default disabled)") + annBenchmarkCommand.PersistentFlags().StringVar(&globalConfig.LASQ, + "lasq", "disabled", "Set LASQ (disabled, auto, or enabled) (default disabled)") annBenchmarkCommand.PersistentFlags().UintVar(&globalConfig.PQRatio, "pqRatio", 4, "Set PQ segments = dimensions / ratio (must divide evenly default 4)") annBenchmarkCommand.PersistentFlags().UintVar(&globalConfig.PQSegments, diff --git a/benchmarker/cmd/config.go b/benchmarker/cmd/config.go index 0d23224..bbfb223 100644 --- a/benchmarker/cmd/config.go +++ b/benchmarker/cmd/config.go @@ -40,6 +40,7 @@ type Config struct { RescoreLimit int PQ string SQ string + LASQ string SkipAsyncReady bool SkipTombstonesEmpty bool SkipMemoryStats bool