internal/units: create replacement for github.com/alecthomas/units pa…

…ckage This commit creates a replacement for the github.com/alecthomas/units package. A replacement is desirable because the upstream package doesn't work very well with the Alloy syntax: * We use the UnmarshalText implementation of Base2Bytes, which treats metric (MB) and IEC (MiB) suffixes as the same. * Base2Bytes always reports values as IEC suffixes when marshalling them back, which can confuse users into thinking a unit conversion has occurred somewhere. I think it's potentially confusing to users that setting a limit of 4MB actually sets a limit of 4MiB, which is ~4% less than what the user intended. The new implementation supports parsing the same input as the old package, including complex byte sizes such as `4MiB3KiB`, though simplified byte sizes is preferred (`4099KiB`), and the simplified forms are returned when marshaling back into a string.
grafana · Apr 29, 2024 · e7bc2fb · e7bc2fb
1 parent fafbaa3
commit e7bc2fb
Show file tree

Hide file tree

Showing 9 changed files with 465 additions and 0 deletions.
diff --git a/internal/units/scanner.go b/internal/units/scanner.go
@@ -0,0 +1,37 @@
+package units
+
+type scanner struct {
+	text   string
+	offset int
+}
+
+func newScanner(in string) *scanner {
+	return &scanner{text: in, offset: 0}
+}
+
+// Next returns true if there are more bytes to scan. It does not advance the scanner.
+func (s *scanner) Next() bool {
+	return s.offset < len(s.text)
+}
+
+// Scan returns the next byte and advances the scanner.
+func (s *scanner) Scan() byte {
+	ch := s.text[s.offset]
+	s.offset++
+	return ch
+}
+
+// String returns the substring up to the current offset.
+func (s *scanner) String() string {
+	return s.text[:s.offset]
+}
+
+// Peek returns the byte at the current offset without advancing the scanner.
+func (s *scanner) Peek() byte {
+	return s.text[s.offset]
+}
+
+// Rem returns the number of bytes remaining in the scanner.
+func (s *scanner) Rem() int {
+	return len(s.text) - s.offset
+}
diff --git a/internal/units/testdata/fuzz/Fuzz_Compare/20c62d19f67262c0 b/internal/units/testdata/fuzz/Fuzz_Compare/20c62d19f67262c0
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("8EiB")
diff --git a/internal/units/testdata/fuzz/Fuzz_Compare/47e2335362c157c8 b/internal/units/testdata/fuzz/Fuzz_Compare/47e2335362c157c8
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("10.B")
diff --git a/internal/units/testdata/fuzz/Fuzz_Compare/bb9c4a861dbff943 b/internal/units/testdata/fuzz/Fuzz_Compare/bb9c4a861dbff943
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("0B00B")
diff --git a/internal/units/testdata/fuzz/Fuzz_Compare/cb083f134466af85 b/internal/units/testdata/fuzz/Fuzz_Compare/cb083f134466af85
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("00B")
diff --git a/internal/units/testdata/fuzz/Fuzz_Compare/fee633b7369dbe46 b/internal/units/testdata/fuzz/Fuzz_Compare/fee633b7369dbe46
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("00B0B")
diff --git a/internal/units/testdata/fuzz/Fuzz_UnmarshalText/e9e3ffbe3b3a072c b/internal/units/testdata/fuzz/Fuzz_UnmarshalText/e9e3ffbe3b3a072c
@@ -0,0 +1,2 @@
+go test fuzz v1
+string("A")
diff --git a/internal/units/units.go b/internal/units/units.go
@@ -0,0 +1,214 @@
+// Package units provides functionality for parsing and displaying multiples of
+// bytes.
+package units
+
+import (
+	"encoding"
+	"errors"
+	"fmt"
+	"strconv"
+)
+
+var (
+	// ErrInvalidSyntax is returned when a byte string cannot be parsed.
+	ErrInvalidSyntax = errors.New("invalid syntax")
+
+	// ErrOverflow is returned when a byte string is too large to be represented.
+	ErrOverflow = errors.New("byte size overflows int64")
+)
+
+type Bytes int64
+
+var (
+	_ encoding.TextUnmarshaler = (*Bytes)(nil)
+	_ encoding.TextMarshaler   = Bytes(0)
+)
+
+const (
+	Byte Bytes = 1
+
+	Kilobyte = 1000 * Byte
+	Megabyte = 1000 * Kilobyte
+	Gigabyte = 1000 * Megabyte
+	Terabyte = 1000 * Gigabyte
+	Petabyte = 1000 * Terabyte
+	Exabyte  = 1000 * Petabyte
+
+	Kibibyte = 1024 * Byte
+	Mebibyte = 1024 * Kibibyte
+	Gibibyte = 1024 * Mebibyte
+	Tebibyte = 1024 * Gibibyte
+	Pebibyte = 1024 * Tebibyte
+	Exbibyte = 1024 * Pebibyte
+)
+
+var unitMap = map[string]Bytes{
+	"":   Byte,
+	"b":  Byte,
+	"B":  Byte,
+	"kB": Kilobyte,
+	"KB": Kilobyte,
+	"MB": Megabyte,
+	"GB": Gigabyte,
+	"TB": Terabyte,
+	"PB": Petabyte,
+	"EB": Exabyte,
+
+	"KiB": Kibibyte,
+	"MiB": Mebibyte,
+	"GiB": Gibibyte,
+	"TiB": Tebibyte,
+	"PiB": Pebibyte,
+	"EiB": Exbibyte,
+}
+
+// UnmarshalText parses a byte size from a string. Byte sizes are represented
+// as sequences of number and unit pairs with no whitespace. Units are
+// represented either as IEC units (KiB, MiB, GiB, etc) or metric units (kB or
+// KB, MB, GB, etc).
+//
+// Multiple sequences of byte sizes can be provided in a single string, such as
+// "4MB2KB". The sum of across all sequences is returned.
+func (b *Bytes) UnmarshalText(text []byte) error {
+	if len(text) == 0 {
+		return ErrInvalidSyntax
+	}
+
+	// Byte offset while scanning through input.
+	s := newScanner(string(text))
+
+	// Parse optional leading sign.
+	sign := 1
+	switch s.Peek() {
+	case '-':
+		sign = -1
+		s.Scan() // Advance scanner
+	case '+':
+		sign = 1 // This is redundant, but added for clarity.
+		s.Scan() // Advance scanner
+	}
+
+	var sum Bytes
+
+	for s.Next() {
+		// Find digit components.
+		numberText, err := scanNumberString(s)
+		if err != nil {
+			return err
+		}
+		number, err := strconv.ParseInt(numberText, 10, 64)
+		if err != nil {
+			return ErrInvalidSyntax
+		}
+
+		unit, ok := unitMap[scanUnitString(s)]
+		if !ok {
+			return ErrInvalidSyntax
+		}
+
+		newBytes := Bytes(number * int64(unit))
+		if newBytes/unit != Bytes(number) {
+			return ErrOverflow
+		} else if sum+newBytes < sum {
+			return ErrOverflow
+		}
+
+		sum += newBytes
+	}
+
+	*b = Bytes(sign) * sum
+	return nil
+}
+
+func scanNumberString(s *scanner) (string, error) {
+	var str string
+
+	for s.Next() {
+		ch := s.Peek()
+
+		if '0' <= ch && ch <= '9' {
+			str += string(ch)
+			_ = s.Scan() // Advance the scanner.
+			continue
+		}
+
+		break
+	}
+
+	if len(str) == 0 {
+		return "", ErrInvalidSyntax
+	}
+	return str, nil
+}
+
+func scanUnitString(s *scanner) string {
+	var str string
+
+	// Scan until a non-number character.
+	for s.Next() {
+		ch := s.Peek()
+		if ch < '0' || ch > '9' {
+			str += string(ch)
+			_ = s.Scan() // Advance the scanner.
+			continue
+		}
+
+		break
+	}
+
+	return str
+}
+
+// MarshalText returns the string representation of b. See [Bytes.String] for
+// more information.
+func (b Bytes) MarshalText() ([]byte, error) {
+	return []byte(b.String()), nil
+}
+
+// String returns a string representing the bytes in human-readable form. Bytes
+// are returned in the highest possible unit that retains accuracy. If b is a
+// multiple of 1024, the IEC binary prefixes are used (KiB, MiB, GiB, etc).
+// Otherwise, the SI decimal prefixes are used (kB, MB, GB, etc).
+//
+// Byte sizes are always displayed as whole numbers, and are represented in the
+// highest possible prefix that preserves precision. For example, 1024 bytes
+// would be represented as "1KiB", while 1025 bytes would be represented as
+// "1025".
+func (b Bytes) String() string {
+	if b == 0 {
+		return "0"
+	}
+
+	var metricSuffixes = []string{"", "kB", "MB", "GB", "TB", "PB", "EB"}
+	var iecSuffixes = []string{"", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"}
+
+	var suffixOffset int
+
+	isMetric := b%1000 == 0
+
+	switch {
+	case isMetric:
+		for b%1000 == 0 && suffixOffset <= len(metricSuffixes)-1 {
+			// Divide by 1000, increase suffix offset.
+			b /= 1000
+			suffixOffset++
+		}
+
+		if suffixOffset == 0 {
+			return fmt.Sprintf("%d", b)
+		}
+		return fmt.Sprintf("%d%s", b, metricSuffixes[suffixOffset])
+
+	default:
+		for b%1024 == 0 && suffixOffset < len(iecSuffixes)-1 {
+			// Divide by 1024, increase suffix offset.
+			b /= 1024
+			suffixOffset++
+		}
+
+		if suffixOffset == 0 {
+			return fmt.Sprintf("%d", b)
+		}
+		return fmt.Sprintf("%d%s", b, iecSuffixes[suffixOffset])
+	}
+}