Skip to content

Commit

Permalink
feat: vec-381 vec-396 add vector records, verticies, index size, and …
Browse files Browse the repository at this point in the history
…unmerged percent to index list output (#20)

* vec-396 move vector records index list output out of verbose and into default

* feat: vec-381 add index size and percent unmerged to index ls output
  • Loading branch information
dwelch-spike authored Oct 29, 2024
1 parent 622fdd2 commit ca564e1
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 55 deletions.
32 changes: 15 additions & 17 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,18 @@ run:
# skip-files:
# - sample

# issues:
# exclude-rules:
# - path: info/as_parser_test\.go
# linters:
# - lll # Test code is allowed to have long lines
# - path: asconfig/generate_test\.go
# linters:
# - dupl # Test code is allowed to have duplicate code
# - path: asconfig/asconfig_test\.go
# linters:
# - dupl # Test code is allowed to have duplicate code
# - path: '(.+)test\.go'
# linters:
# - govet # Test code field alignment for sake of space is not a concern
# - linters:
# - lll
# source: "// "
issues:
exclude-rules:
- path: info/as_parser_test\.go
linters:
- lll # Test code is allowed to have long lines
- path: asconfig/generate_test\.go
linters:
- dupl # Test code is allowed to have duplicate code
- path: asconfig/asconfig_test\.go
linters:
- dupl # Test code is allowed to have duplicate code
- path: '(.+)test\.go'
linters:
- govet # Test code field alignment for sake of space is not a concern
- wsl # Auto generated tests cuddle assignments
4 changes: 2 additions & 2 deletions cmd/flags/hnsw.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func NewHnswIndexCachingFlags() *IndexCachingFlags {
func (cf *IndexCachingFlags) NewFlagSet() *pflag.FlagSet {
flagSet := &pflag.FlagSet{}
flagSet.Var(&cf.MaxEntries, HnswIndexCacheMaxEntries, "Maximum number of entries to cache.")
flagSet.Var(&cf.Expiry, HnswIndexCacheExpiry, "A cache entry will expire after this amount of time has passed since the entry was added to cache, or 'inf' to never expire.")
flagSet.Var(&cf.Expiry, HnswIndexCacheExpiry, "A cache entry will expire after this amount of time has passed since the entry was added to cache, or -1 to never expire.")

return flagSet
}
Expand Down Expand Up @@ -85,7 +85,7 @@ func NewHnswRecordCachingFlags() *RecordCachingFlags {
func (cf *RecordCachingFlags) NewFlagSet() *pflag.FlagSet {
flagSet := &pflag.FlagSet{}
flagSet.Var(&cf.MaxEntries, HnswRecordCacheMaxEntries, "Maximum number of entries to cache.")
flagSet.Var(&cf.Expiry, HnswRecordCacheExpiry, "A cache entry will expire after this amount of time has passed since the entry was added to cache, or 'inf' to never expire.")
flagSet.Var(&cf.Expiry, HnswRecordCacheExpiry, "A cache entry will expire after this amount of time has passed since the entry was added to cache, or -1 to never expire.")

return flagSet
}
Expand Down
8 changes: 4 additions & 4 deletions cmd/flags/optionals.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ func (f *DurationOptionalFlag) Int64() *int64 {
return &milli
}

// InfDurationOptionalFlag is a flag that can be either a time.duration or infinity.
// It is used for flags like --hnsw-index-cache-expiry which can be set to "infinity"
// InfDurationOptionalFlag is a flag that can be either a time.duration or -1 (never expire).
// It is used for flags like --hnsw-index-cache-expiry which can be set to never expire (-1)
type InfDurationOptionalFlag struct {
duration DurationOptionalFlag
isInfinite bool
Expand All @@ -224,7 +224,7 @@ func (f *InfDurationOptionalFlag) Set(val string) error {

val = strings.ToLower(val)

if val == "inf" || val == "infinity" || val == "-1" {
if val == strconv.Itoa(Infinity) {
f.isInfinite = true
} else {
return fmt.Errorf("invalid duration %s", val)
Expand All @@ -239,7 +239,7 @@ func (f *InfDurationOptionalFlag) Type() string {

func (f *InfDurationOptionalFlag) String() string {
if f.isInfinite {
return "infinity"
return "-1"
}

if f.duration.Val != nil {
Expand Down
22 changes: 2 additions & 20 deletions cmd/flags/optionals_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,30 +114,12 @@ func (suite *OptionalFlagSuite) TestDurationOptionalFlag() {
func (suite *OptionalFlagSuite) TestInfDurationOptionalFlag() {
f := &InfDurationOptionalFlag{}

err := f.Set("inf")
err := f.Set("-1")
if err != nil {
suite.T().Errorf("Unexpected error: %v", err)
}

suite.Equal("infinity", f.String())
suite.Equal(int64(-1), *f.Int64())
f = &InfDurationOptionalFlag{}

err = f.Set("infinity")
if err != nil {
suite.T().Errorf("Unexpected error: %v", err)
}

suite.Equal("infinity", f.String())
suite.Equal(int64(-1), *f.Int64())
f = &InfDurationOptionalFlag{}

err = f.Set("-1")
if err != nil {
suite.T().Errorf("Unexpected error: %v", err)
}

suite.Equal("infinity", f.String())
suite.Equal("-1", f.String())
suite.Equal(int64(-1), *f.Int64())
f = &InfDurationOptionalFlag{}

Expand Down
56 changes: 54 additions & 2 deletions cmd/writers/indexList.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@ func NewIndexTableWriter(writer io.Writer, verbose bool, logger *slog.Logger) *I
"Dimensions",
"Distance Metric",
"Unmerged",
"Vector Records",
"Size",
"Unmerged %",
}
verboseHeadings := append(table.Row{}, headings...)
verboseHeadings = append(
verboseHeadings,
"Vector Records",
"Vertices",
"Labels*",
"Storage",
Expand Down Expand Up @@ -79,11 +81,13 @@ func (itw *IndexTableWriter) AppendIndexRow(
index.Dimensions,
index.VectorDistanceMetric,
status.GetUnmergedRecordCount(),
status.GetIndexHealerVectorRecordsIndexed(),
formatBytes(calculateIndexSize(index, status)),
getPercentUnmerged(status),
}

if itw.verbose {
row = append(row,
status.GetIndexHealerVectorRecordsIndexed(),
status.GetIndexHealerVerticesValid(),
index.Labels,
)
Expand Down Expand Up @@ -145,3 +149,51 @@ func convertMillisecondToDuration[T int64 | uint64 | uint32](m T) time.Duration
func convertFloatToPercentStr(f float32) string {
return fmt.Sprintf("%.2f%%", f)
}

// calculateIndexSize calculates the size of the index in bytes
func calculateIndexSize(index *protos.IndexDefinition, status *protos.IndexStatusResponse) int64 {
// Each dimension is a float32
vectorSize := int64(index.Dimensions) * 4
// Each index record has ~500 bytes of overhead + the vector size
indexRecSize := 500 + vectorSize
// The total size is the number of records times the size of each record
return indexRecSize * status.GetIndexHealerVerticesValid()
}

// formatBytes converts bytes to human readable string format
func formatBytes(bytes int64) string {
const (
B = 1
KB = 1024 * B
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB
PB = 1024 * TB
)

switch {
case bytes >= PB:
return fmt.Sprintf("%.2f PB", float64(bytes)/float64(PB))
case bytes >= TB:
return fmt.Sprintf("%.2f TB", float64(bytes)/float64(TB))
case bytes >= GB:
return fmt.Sprintf("%.2f GB", float64(bytes)/float64(GB))
case bytes >= MB:
return fmt.Sprintf("%.2f MB", float64(bytes)/float64(MB))
case bytes >= KB:
return fmt.Sprintf("%.2f KB", float64(bytes)/float64(KB))
default:
return fmt.Sprintf("%d B", bytes)
}
}

func getPercentUnmerged(status *protos.IndexStatusResponse) string {
unmergedCount := status.GetUnmergedRecordCount()

verticies := status.GetIndexHealerVerticesValid()
if verticies == 0 {
return "0%"
}

return fmt.Sprintf("%.2f%%", float64(unmergedCount)/float64(verticies)*100)
}
175 changes: 175 additions & 0 deletions cmd/writers/indexList_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package writers

import (
"testing"

"github.com/aerospike/avs-client-go/protos"
)

func Test_calculateIndexSize(t *testing.T) {
type args struct {
index *protos.IndexDefinition
status *protos.IndexStatusResponse
}
tests := []struct {
name string
args args
want int64
}{
{
name: "positive simple",
args: args{
index: &protos.IndexDefinition{
Dimensions: 100,
},
status: &protos.IndexStatusResponse{
IndexHealerVerticesValid: 10,
},
},
want: 9000,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := calculateIndexSize(tt.args.index, tt.args.status); got != tt.want {
t.Errorf("calculateIndexSize() = %v, want %v", got, tt.want)
}
})
}
}

func Test_formatBytes(t *testing.T) {
type args struct {
bytes int64
}
tests := []struct {
name string
args args
want string
}{
{
name: "petabytes",
args: args{
bytes: 1024 * 1024 * 1024 * 1024 * 1024,
},
want: "1.00 PB",
},
{
name: "terabytes",
args: args{
bytes: 1024 * 1024 * 1024 * 1024,
},
want: "1.00 TB",
},
{
name: "gigabytes",
args: args{
bytes: 1024 * 1024 * 1024,
},
want: "1.00 GB",
},
{
name: "megabytes",
args: args{
bytes: 1024 * 1024,
},
want: "1.00 MB",
},
{
name: "kilobytes",
args: args{
bytes: 1024,
},
want: "1.00 KB",
},
{
name: "bytes",
args: args{
bytes: 512,
},
want: "512 B",
},
{
name: "zero bytes",
args: args{
bytes: 0,
},
want: "0 B",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := formatBytes(tt.args.bytes); got != tt.want {
t.Errorf("formatBytes() = %v, want %v", got, tt.want)
}
})
}
}
func Test_getPercentUnmerged(t *testing.T) {
type args struct {
status *protos.IndexStatusResponse
}
tests := []struct {
name string
args args
want string
}{
{
name: "zero vertices",
args: args{
status: &protos.IndexStatusResponse{
IndexHealerVerticesValid: 0,
UnmergedRecordCount: 10,
},
},
want: "0%",
},
{
name: "zero unmerged records",
args: args{
status: &protos.IndexStatusResponse{
IndexHealerVerticesValid: 100,
UnmergedRecordCount: 0,
},
},
want: "0.00%",
},
{
name: "50 percent unmerged",
args: args{
status: &protos.IndexStatusResponse{
IndexHealerVerticesValid: 100,
UnmergedRecordCount: 50,
},
},
want: "50.00%",
},
{
name: "100 percent unmerged",
args: args{
status: &protos.IndexStatusResponse{
IndexHealerVerticesValid: 100,
UnmergedRecordCount: 100,
},
},
want: "100.00%",
},
{
name: "33.33 percent unmerged",
args: args{
status: &protos.IndexStatusResponse{
IndexHealerVerticesValid: 300,
UnmergedRecordCount: 100,
},
},
want: "33.33%",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := getPercentUnmerged(tt.args.status); got != tt.want {
t.Errorf("getPercentUnmerged() = %v, want %v", got, tt.want)
}
})
}
}
Loading

0 comments on commit ca564e1

Please sign in to comment.