From f2fa0e4ba5226ab0644b05bb04099c30b144a1d5 Mon Sep 17 00:00:00 2001 From: Pavel Gabriel Date: Wed, 8 Nov 2023 18:17:03 +0100 Subject: [PATCH 1/4] use runes --- field/string.go | 5 +++- field/string_test.go | 54 ++++++++++++++++++++++++++++++++++++++++++++ padding/left.go | 5 ++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/field/string.go b/field/string.go index c8ce11c7..a68f56e2 100644 --- a/field/string.go +++ b/field/string.go @@ -6,6 +6,7 @@ import ( "reflect" "strconv" "strings" + "unicode/utf8" "github.com/moov-io/iso8583/utils" ) @@ -81,7 +82,9 @@ func (f *String) Pack() ([]byte, error) { return nil, fmt.Errorf("failed to encode content: %w", err) } - packedLength, err := f.spec.Pref.EncodeLength(f.spec.Length, len(data)) + runeCount := utf8.RuneCountInString(string(data)) + + packedLength, err := f.spec.Pref.EncodeLength(f.spec.Length, runeCount) if err != nil { return nil, fmt.Errorf("failed to encode length: %w", err) } diff --git a/field/string_test.go b/field/string_test.go index 01d1b092..cca71059 100644 --- a/field/string_test.go +++ b/field/string_test.go @@ -59,6 +59,60 @@ func TestStringField(t *testing.T) { require.Equal(t, "hello", str.Value()) } +func TestStringWithNonUTF8Encoding(t *testing.T) { + spec := &Spec{ + Length: 10, + Description: "Field", + Enc: encoding.EBCDIC1047, + Pref: prefix.EBCDIC1047.Fixed, + Pad: padding.Left(' '), + } + str := NewString(spec) + + hullo := []byte{0x88, 0xDC, 0x93, 0x93, 0x96} + olluh := []byte{0x96, 0x93, 0x93, 0xDC, 0x88} + + // SetBytes takes UTF-8 encoded bytes + str.SetBytes([]byte("hüllo")) + require.Equal(t, "hüllo", str.Value()) + + packed, err := str.Pack() + require.NoError(t, err) + require.Equal(t, append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, hullo...), packed) + + length, err := str.Unpack(append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, olluh...)) + require.NoError(t, err) + require.Equal(t, 10, length) + + // Bytes returns the UTF-8 encoding of the value + b, err := str.Bytes() + require.NoError(t, err) + require.Equal(t, []byte("ollüh"), b) + + require.Equal(t, "ollüh", str.Value()) + + str = NewString(spec) + str.Marshal(NewStringValue("hüllo")) + packed, err = str.Pack() + require.NoError(t, err) + require.Equal(t, append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, hullo...), packed) + + str = NewString(spec) + length, err = str.Unpack(append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, olluh...)) + require.NoError(t, err) + require.Equal(t, 10, length) + require.Equal(t, "ollüh", str.Value()) + + str = NewString(spec) + err = str.SetBytes([]byte("hüllo")) + require.NoError(t, err) + require.Equal(t, "hüllo", str.Value()) + + str = NewString(spec) + str.SetValue("hüllo") + require.Equal(t, "hüllo", str.Value()) +} + func TestStringNil(t *testing.T) { var str *String = nil diff --git a/padding/left.go b/padding/left.go index 6480f7ba..7b9df213 100644 --- a/padding/left.go +++ b/padding/left.go @@ -19,11 +19,12 @@ func NewLeftPadder(pad rune) Padder { } func (p *leftPadder) Pad(data []byte, length int) []byte { - if len(data) >= length { + runeCount := utf8.RuneCount(data) + if runeCount >= length { return data } - padding := bytes.Repeat(p.pad, length-len(data)) + padding := bytes.Repeat(p.pad, length-runeCount) return append(padding, data...) } From 9226221a4c4ec5cd415d8c8be821c840d84eccef Mon Sep 17 00:00:00 2001 From: Pavel Gabriel Date: Fri, 10 Nov 2023 13:15:30 +0100 Subject: [PATCH 2/4] avoid double memory allocation --- field/string.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/field/string.go b/field/string.go index a68f56e2..487d46d9 100644 --- a/field/string.go +++ b/field/string.go @@ -82,7 +82,7 @@ func (f *String) Pack() ([]byte, error) { return nil, fmt.Errorf("failed to encode content: %w", err) } - runeCount := utf8.RuneCountInString(string(data)) + runeCount := utf8.RuneCount(data) packedLength, err := f.spec.Pref.EncodeLength(f.spec.Length, runeCount) if err != nil { From 2885c7cb705e7750d4ef8f01b28f0912c4f7cebb Mon Sep 17 00:00:00 2001 From: Pavel Gabriel Date: Tue, 14 Nov 2023 14:34:20 +0100 Subject: [PATCH 3/4] utf8 example --- field/string_test.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/field/string_test.go b/field/string_test.go index cca71059..00b16146 100644 --- a/field/string_test.go +++ b/field/string_test.go @@ -7,6 +7,7 @@ import ( "github.com/moov-io/iso8583/encoding" "github.com/moov-io/iso8583/padding" "github.com/moov-io/iso8583/prefix" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -59,6 +60,28 @@ func TestStringField(t *testing.T) { require.Equal(t, "hello", str.Value()) } +func TestStringWithUTF8Encoding(t *testing.T) { + spec := &Spec{ + Length: 10, + Description: "Field", + Enc: encoding.Binary, + Pref: prefix.Binary.Fixed, + Pad: padding.Left(' '), + } + str := NewStringValue("hüllo") + str.SetSpec(spec) + packed, err := str.Pack() + require.NoError(t, err) + + assert.Len(t, packed, 10) + + str2 := NewString(spec) + _, err = str2.Unpack(packed) + require.NoError(t, err) + + assert.Equal(t, "hüllo", str2.Value()) +} + func TestStringWithNonUTF8Encoding(t *testing.T) { spec := &Spec{ Length: 10, From 04062978e98a195d27fe973daea05ef0907bc303 Mon Sep 17 00:00:00 2001 From: Pavel Gabriel Date: Mon, 25 Mar 2024 20:25:19 +0100 Subject: [PATCH 4/4] update test --- field/string_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/field/string_test.go b/field/string_test.go index 00b16146..c0b7e9e2 100644 --- a/field/string_test.go +++ b/field/string_test.go @@ -64,8 +64,8 @@ func TestStringWithUTF8Encoding(t *testing.T) { spec := &Spec{ Length: 10, Description: "Field", - Enc: encoding.Binary, - Pref: prefix.Binary.Fixed, + Enc: encoding.EBCDIC1047, + Pref: prefix.EBCDIC1047.Fixed, Pad: padding.Left(' '), } str := NewStringValue("hüllo")