-
Notifications
You must be signed in to change notification settings - Fork 245
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internal/units: create replacement for github.com/alecthomas/units pa…
…ckage This commit creates a replacement for the github.com/alecthomas/units package. A replacement is desirable because the upstream package doesn't work very well with the Alloy syntax: * We use the UnmarshalText implementation of Base2Bytes, which treats metric (MB) and IEC (MiB) suffixes as the same. * Base2Bytes always reports values as IEC suffixes when marshalling them back, which can confuse users into thinking a unit conversion has occurred somewhere. I think it's potentially confusing to users that setting a limit of 4MB actually sets a limit of 4MiB, which is ~4% less than what the user intended. The new implementation supports parsing the same input as the old package, including complex byte sizes such as `4MiB3KiB`, though simplified byte sizes is preferred (`4099KiB`), and the simplified forms are returned when marshaling back into a string.
- Loading branch information
Showing
9 changed files
with
465 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package units | ||
|
||
type scanner struct { | ||
text string | ||
offset int | ||
} | ||
|
||
func newScanner(in string) *scanner { | ||
return &scanner{text: in, offset: 0} | ||
} | ||
|
||
// Next returns true if there are more bytes to scan. It does not advance the scanner. | ||
func (s *scanner) Next() bool { | ||
return s.offset < len(s.text) | ||
} | ||
|
||
// Scan returns the next byte and advances the scanner. | ||
func (s *scanner) Scan() byte { | ||
ch := s.text[s.offset] | ||
s.offset++ | ||
return ch | ||
} | ||
|
||
// String returns the substring up to the current offset. | ||
func (s *scanner) String() string { | ||
return s.text[:s.offset] | ||
} | ||
|
||
// Peek returns the byte at the current offset without advancing the scanner. | ||
func (s *scanner) Peek() byte { | ||
return s.text[s.offset] | ||
} | ||
|
||
// Rem returns the number of bytes remaining in the scanner. | ||
func (s *scanner) Rem() int { | ||
return len(s.text) - s.offset | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
go test fuzz v1 | ||
string("8EiB") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
go test fuzz v1 | ||
string("10.B") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
go test fuzz v1 | ||
string("0B00B") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
go test fuzz v1 | ||
string("00B") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
go test fuzz v1 | ||
string("00B0B") |
2 changes: 2 additions & 0 deletions
2
internal/units/testdata/fuzz/Fuzz_UnmarshalText/e9e3ffbe3b3a072c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
go test fuzz v1 | ||
string("A") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
// Package units provides functionality for parsing and displaying multiples of | ||
// bytes. | ||
package units | ||
|
||
import ( | ||
"encoding" | ||
"errors" | ||
"fmt" | ||
"strconv" | ||
) | ||
|
||
var ( | ||
// ErrInvalidSyntax is returned when a byte string cannot be parsed. | ||
ErrInvalidSyntax = errors.New("invalid syntax") | ||
|
||
// ErrOverflow is returned when a byte string is too large to be represented. | ||
ErrOverflow = errors.New("byte size overflows int64") | ||
) | ||
|
||
type Bytes int64 | ||
|
||
var ( | ||
_ encoding.TextUnmarshaler = (*Bytes)(nil) | ||
_ encoding.TextMarshaler = Bytes(0) | ||
) | ||
|
||
const ( | ||
Byte Bytes = 1 | ||
|
||
Kilobyte = 1000 * Byte | ||
Megabyte = 1000 * Kilobyte | ||
Gigabyte = 1000 * Megabyte | ||
Terabyte = 1000 * Gigabyte | ||
Petabyte = 1000 * Terabyte | ||
Exabyte = 1000 * Petabyte | ||
|
||
Kibibyte = 1024 * Byte | ||
Mebibyte = 1024 * Kibibyte | ||
Gibibyte = 1024 * Mebibyte | ||
Tebibyte = 1024 * Gibibyte | ||
Pebibyte = 1024 * Tebibyte | ||
Exbibyte = 1024 * Pebibyte | ||
) | ||
|
||
var unitMap = map[string]Bytes{ | ||
"": Byte, | ||
"b": Byte, | ||
"B": Byte, | ||
"kB": Kilobyte, | ||
"KB": Kilobyte, | ||
"MB": Megabyte, | ||
"GB": Gigabyte, | ||
"TB": Terabyte, | ||
"PB": Petabyte, | ||
"EB": Exabyte, | ||
|
||
"KiB": Kibibyte, | ||
"MiB": Mebibyte, | ||
"GiB": Gibibyte, | ||
"TiB": Tebibyte, | ||
"PiB": Pebibyte, | ||
"EiB": Exbibyte, | ||
} | ||
|
||
// UnmarshalText parses a byte size from a string. Byte sizes are represented | ||
// as sequences of number and unit pairs with no whitespace. Units are | ||
// represented either as IEC units (KiB, MiB, GiB, etc) or metric units (kB or | ||
// KB, MB, GB, etc). | ||
// | ||
// Multiple sequences of byte sizes can be provided in a single string, such as | ||
// "4MB2KB". The sum of across all sequences is returned. | ||
func (b *Bytes) UnmarshalText(text []byte) error { | ||
if len(text) == 0 { | ||
return ErrInvalidSyntax | ||
} | ||
|
||
// Byte offset while scanning through input. | ||
s := newScanner(string(text)) | ||
|
||
// Parse optional leading sign. | ||
sign := 1 | ||
switch s.Peek() { | ||
case '-': | ||
sign = -1 | ||
s.Scan() // Advance scanner | ||
case '+': | ||
sign = 1 // This is redundant, but added for clarity. | ||
s.Scan() // Advance scanner | ||
} | ||
|
||
var sum Bytes | ||
|
||
for s.Next() { | ||
// Find digit components. | ||
numberText, err := scanNumberString(s) | ||
if err != nil { | ||
return err | ||
} | ||
number, err := strconv.ParseInt(numberText, 10, 64) | ||
if err != nil { | ||
return ErrInvalidSyntax | ||
} | ||
|
||
unit, ok := unitMap[scanUnitString(s)] | ||
if !ok { | ||
return ErrInvalidSyntax | ||
} | ||
|
||
newBytes := Bytes(number * int64(unit)) | ||
if newBytes/unit != Bytes(number) { | ||
return ErrOverflow | ||
} else if sum+newBytes < sum { | ||
return ErrOverflow | ||
} | ||
|
||
sum += newBytes | ||
} | ||
|
||
*b = Bytes(sign) * sum | ||
return nil | ||
} | ||
|
||
func scanNumberString(s *scanner) (string, error) { | ||
var str string | ||
|
||
for s.Next() { | ||
ch := s.Peek() | ||
|
||
if '0' <= ch && ch <= '9' { | ||
str += string(ch) | ||
_ = s.Scan() // Advance the scanner. | ||
continue | ||
} | ||
|
||
break | ||
} | ||
|
||
if len(str) == 0 { | ||
return "", ErrInvalidSyntax | ||
} | ||
return str, nil | ||
} | ||
|
||
func scanUnitString(s *scanner) string { | ||
var str string | ||
|
||
// Scan until a non-number character. | ||
for s.Next() { | ||
ch := s.Peek() | ||
if ch < '0' || ch > '9' { | ||
str += string(ch) | ||
_ = s.Scan() // Advance the scanner. | ||
continue | ||
} | ||
|
||
break | ||
} | ||
|
||
return str | ||
} | ||
|
||
// MarshalText returns the string representation of b. See [Bytes.String] for | ||
// more information. | ||
func (b Bytes) MarshalText() ([]byte, error) { | ||
return []byte(b.String()), nil | ||
} | ||
|
||
// String returns a string representing the bytes in human-readable form. Bytes | ||
// are returned in the highest possible unit that retains accuracy. If b is a | ||
// multiple of 1024, the IEC binary prefixes are used (KiB, MiB, GiB, etc). | ||
// Otherwise, the SI decimal prefixes are used (kB, MB, GB, etc). | ||
// | ||
// Byte sizes are always displayed as whole numbers, and are represented in the | ||
// highest possible prefix that preserves precision. For example, 1024 bytes | ||
// would be represented as "1KiB", while 1025 bytes would be represented as | ||
// "1025". | ||
func (b Bytes) String() string { | ||
if b == 0 { | ||
return "0" | ||
} | ||
|
||
var metricSuffixes = []string{"", "kB", "MB", "GB", "TB", "PB", "EB"} | ||
var iecSuffixes = []string{"", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"} | ||
|
||
var suffixOffset int | ||
|
||
isMetric := b%1000 == 0 | ||
|
||
switch { | ||
case isMetric: | ||
for b%1000 == 0 && suffixOffset <= len(metricSuffixes)-1 { | ||
// Divide by 1000, increase suffix offset. | ||
b /= 1000 | ||
suffixOffset++ | ||
} | ||
|
||
if suffixOffset == 0 { | ||
return fmt.Sprintf("%d", b) | ||
} | ||
return fmt.Sprintf("%d%s", b, metricSuffixes[suffixOffset]) | ||
|
||
default: | ||
for b%1024 == 0 && suffixOffset < len(iecSuffixes)-1 { | ||
// Divide by 1024, increase suffix offset. | ||
b /= 1024 | ||
suffixOffset++ | ||
} | ||
|
||
if suffixOffset == 0 { | ||
return fmt.Sprintf("%d", b) | ||
} | ||
return fmt.Sprintf("%d%s", b, iecSuffixes[suffixOffset]) | ||
} | ||
} |
Oops, something went wrong.