diff --git a/go.mod b/go.mod index 3ab6e85..bc6086c 100644 --- a/go.mod +++ b/go.mod @@ -5,5 +5,5 @@ go 1.13 require ( github.com/longbridgeapp/assert v0.1.0 github.com/pkg/errors v0.8.1 - github.com/tdewolff/parse/v2 v2.4.3 + github.com/tdewolff/parse/v2 v2.6.5 ) diff --git a/go.sum b/go.sum index df6cc39..47b04b1 100644 --- a/go.sum +++ b/go.sum @@ -9,9 +9,11 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/tdewolff/parse/v2 v2.4.3 h1:k24zHgTRGm7LkvbTEreuavyZTf0k8a/lIenggv62OiU= -github.com/tdewolff/parse/v2 v2.4.3/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho= -github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= +github.com/tdewolff/parse/v2 v2.6.5 h1:lYvWBk55GkqKl0JJenGpmrgu/cPHQQ6/Mm1hBGswoGQ= +github.com/tdewolff/parse/v2 v2.6.5/go.mod h1:woz0cgbLwFdtbjJu8PIKxhW05KplTFQkOdX78o+Jgrs= +github.com/tdewolff/test v1.0.7 h1:8Vs0142DmPFW/bQeHRP3MV19m1gvndjUb1sn8yy74LM= +github.com/tdewolff/test v1.0.7/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/html.go b/html.go index 7183d43..4261067 100644 --- a/html.go +++ b/html.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/pkg/errors" + "github.com/tdewolff/parse/v2" "github.com/tdewolff/parse/v2/html" // "golang.org/x/net/html" ) @@ -31,8 +32,9 @@ func UnformatHTML(body string, options ...UnformatOption) (out string, err error func processHTML(body string, fn func(plainText string) string) (out string, err error) { w := &bytes.Buffer{} - lex := html.NewLexer(strings.NewReader(body)) - defer lex.Restore() + i := parse.NewInput(strings.NewReader(body)) + lex := html.NewLexer(i) + out = body ignoreTag := false @@ -46,7 +48,7 @@ func processHTML(body string, fn func(plainText string) string) (out string, err return w.String(), nil } - err = errors.Errorf("Error on line %d, %v", lex.Offset(), lex.Err()) + err = errors.Errorf("Error on line %d, %v", i.Offset(), lex.Err()) return case html.TextToken: if ignoreTag {