From 558be3b0bc9197ab732defe14f40cf2b98e353d7 Mon Sep 17 00:00:00 2001 From: ningfei Date: Wed, 19 Jun 2024 11:57:46 +0800 Subject: [PATCH] Optimize template replacement speed --- ImageProcesser.go | 4 +- Template.go | 2 +- Template.helpers.go | 32 +++++++++ Template.stage.funcs.go | 139 ++++++++++++++++++++++++---------------- t.docx_bench_test.go | 68 ++++++++++++++++++++ xml.node.go | 70 +++++++++++++++----- 6 files changed, 241 insertions(+), 74 deletions(-) create mode 100644 t.docx_bench_test.go diff --git a/ImageProcesser.go b/ImageProcesser.go index 2dd76e6..1c677e7 100644 --- a/ImageProcesser.go +++ b/ImageProcesser.go @@ -52,7 +52,7 @@ func processImage(img *Image) (imgXMLStr string, err error) { Space: "", Local: "Default", }, - Attrs: []xml.Attr{ + Attrs: []*xml.Attr{ {Name: xml.Name{Space: "", Local: "Extension"}, Value: imgExt}, {Name: xml.Name{Space: "", Local: "ContentType"}, Value: "image/" + imgExt}, }, @@ -76,7 +76,7 @@ func processImage(img *Image) (imgXMLStr string, err error) { Space: "", Local: "Relationship", }, - Attrs: []xml.Attr{ + Attrs: []*xml.Attr{ {Name: xml.Name{Space: "", Local: "Id"}, Value: rid}, {Name: xml.Name{Space: "", Local: "Type"}, Value: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"}, {Name: xml.Name{Space: "", Local: "Target"}, Value: "media/" + imgPath}, diff --git a/Template.go b/Template.go index 02faeb5..23b66f9 100644 --- a/Template.go +++ b/Template.go @@ -134,7 +134,7 @@ func OpenTemplateWithURL(docurl string) (tpl *Template, err error) { // Params - replace template placeholders with params // "Hello {{ Name }}!"" --> "Hello World!"" -func (t *Template) Params(v any) { +func (t *Template) Params(v interface{}) { // t.params = collectParams("", v) switch val := v.(type) { case map[string]interface{}: diff --git a/Template.helpers.go b/Template.helpers.go index e929201..713d294 100644 --- a/Template.helpers.go +++ b/Template.helpers.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/xml" "log" + "strings" ) // Convert given bytes to struct of xml nodes @@ -119,3 +120,34 @@ func (t *Template) matchBrokenLeftPlaceholder(content string) bool { func (t *Template) matchBrokenRightPlaceholder(content string) bool { return t.matchBrokenPlaceholder(content, false) } + +func (t Template) GetContentPrefixList(content []byte) []string { + var ret []string + var record strings.Builder + start := false + length := len(content) + for i, v := range content { + if i == 0 { + continue + } + + if v == '{' && content[i-1] == '{' { + start = true + continue + } + if start { + if v == ' ' || (v == '}' && length-1 > i && content[i+1] == '}') { + ret = append(ret, record.String()) + record.Reset() + start = false + } + if v == '.' { + ret = append(ret, record.String()) + record.Reset() + continue + } + record.WriteByte(v) + } + } + return ret +} diff --git a/Template.stage.funcs.go b/Template.stage.funcs.go index d4d78da..e4201fa 100644 --- a/Template.stage.funcs.go +++ b/Template.stage.funcs.go @@ -44,31 +44,50 @@ func (t *Template) triggerMissingParams(xnode *xmlNode) { // Expand complex placeholders func (t *Template) expandPlaceholders(xnode *xmlNode) { + type xmlNodeContent struct { + node *xmlNode + contents []byte + } + prefixNodeMap := map[string][]xmlNodeContent{} + xnode.WalkWithEnd(func(nrow *xmlNode) bool { + if nrow.isNew { + return false + } + if !nrow.isRowElement() { + return false + } + contents := nrow.AllContents() + prefixList := t.GetContentPrefixList(contents) + for _, prefix := range prefixList { + prefixNodeMap[prefix] = append(prefixNodeMap[prefix], xmlNodeContent{ + contents: contents, + node: nrow, + }) + } + return true + }) t.params.Walk(func(p *Param) { if p.Type != SliceParam { return } prefixes := []string{ - p.PlaceholderPrefix(), - p.ToCompact(p.PlaceholderPrefix()), + p.AbsoluteKey, + p.ToCompact(p.AbsoluteKey), + } + if prefixes[0] == prefixes[1] { + prefixes = prefixes[:1] } - var max int for _, prefix := range prefixes { - xnode.Walk(func(nrow *xmlNode) { - if nrow.isNew { - return - } - if !nrow.isRowElement() { - return - } - if !nrow.AnyChildContains([]byte(prefix)) { - return - } - - contents := nrow.AllContents() - rowParams := rowParams(contents) + nodeList, ok := prefixNodeMap[prefix] + if !ok { + continue + } + for i := range nodeList { + node := nodeList[i] + nrow := node.node + rowParams := rowParams(node.contents) rowPlaceholders := make(map[string]*placeholder) // Collect placeholder that for expansion for _, rowParam := range rowParams { @@ -138,7 +157,7 @@ func (t *Template) expandPlaceholders(xnode *xmlNode) { } } } - }) + } } }) @@ -151,51 +170,59 @@ func (t *Template) expandPlaceholders(xnode *xmlNode) { // Replace single params by type func (t *Template) replaceSingleParams(xnode *xmlNode, triggerParamOnly bool) { + replaceAttr := []*xml.Attr{} + xnodeList := []*xmlNode{} xnode.Walk(func(n *xmlNode) { if n == nil || n.isDeleted { return } - - // node params - t.params.Walk(func(p *Param) { - for i, attr := range n.Attrs { - if strings.Contains(attr.Value, "{{") { - n.Attrs[i].Value = string(p.replaceIn([]byte(attr.Value))) - } + for _, attr := range n.Attrs { + if strings.Contains(attr.Value, "{{") { + replaceAttr = append(replaceAttr, attr) } - }) - - // node contentt - if bytes.Contains(n.Content, []byte("{{")) { - // Try to replace on node that contains possible placeholder - t.params.Walk(func(p *Param) { - // Only string and image param to replace - if p.Type != StringParam && p.Type != ImageParam { - return - } - // Prefix check - if !n.ContentHasPrefix(p.PlaceholderPrefix()) { - return - } - // Trigger: does placeholder have trigger - if p.Trigger = p.extractTriggerFrom(n.Content); p.Trigger != nil { - defer func() { - p.RunTrigger(n) - }() - } - if triggerParamOnly { - return - } - // Repalce by type - switch p.Type { - case StringParam: - t.replaceTextParam(n, p) - case ImageParam: - t.replaceImageParams(n, p) - } - }) } + xnodeList = append(xnodeList, n) + }) + paramAbsoluteKeyMap := map[string]*Param{} + t.params.Walk(func(p *Param) { + for _, v := range replaceAttr { + v.Value = string(p.replaceIn([]byte(v.Value))) + } + if p.Type != StringParam && p.Type != ImageParam { + return + } + paramAbsoluteKeyMap[p.AbsoluteKey] = p }) + for i := range xnodeList { + n := xnodeList[i] + for _, key := range n.GetContentPrefixList() { + p, ok := paramAbsoluteKeyMap[key] + if !ok { + continue + } + t.replaceAndRunTrigger(p, n, triggerParamOnly) + } + } +} + +func (t *Template) replaceAndRunTrigger(p *Param, n *xmlNode, triggerParamOnly bool) { + // Trigger: does placeholder have trigger + if p.Trigger = p.extractTriggerFrom(n.Content); p.Trigger != nil { + // if + defer func() { + p.RunTrigger(n) + }() + } + if triggerParamOnly { + return + } + // Repalce by type + switch p.Type { + case StringParam: + t.replaceTextParam(n, p) + case ImageParam: + t.replaceImageParams(n, p) + } } // Enhance some markup (removed when building XML in the end) @@ -215,7 +242,7 @@ func (t *Template) enhanceMarkup(xnode *xmlNode) { } // n.XMLName.Local = "w-item" - n.Attrs = append(n.Attrs, xml.Attr{ + n.Attrs = append(n.Attrs, &xml.Attr{ Name: xml.Name{Local: "list-id"}, Value: listID, }) diff --git a/t.docx_bench_test.go b/t.docx_bench_test.go new file mode 100644 index 0000000..ad181f3 --- /dev/null +++ b/t.docx_bench_test.go @@ -0,0 +1,68 @@ +package docxplate_test + +import ( + "log" + "testing" + + "github.com/bobiverse/docxplate" +) + +func BenchmarkLists100(b *testing.B) { + var user = User{ + Name: "Walter", + } + for i := 0; i < 100; i++ { + user.Friends = append(user.Friends, &User{Name: "Bob", Age: 28}) + } + + tdoc, _ := docxplate.OpenTemplate("test-data/lists.docx") + tdoc.Params(user) + if err := tdoc.ExportDocx("test-data/~test-lists.docx"); err != nil { + log.Fatal(err) + } +} + +func BenchmarkLists200(b *testing.B) { + var user = User{ + Name: "Walter", + } + for i := 0; i < 200; i++ { + user.Friends = append(user.Friends, &User{Name: "Bob", Age: 28}) + } + + tdoc, _ := docxplate.OpenTemplate("test-data/lists.docx") + tdoc.Params(user) + if err := tdoc.ExportDocx("test-data/~test-lists.docx"); err != nil { + log.Fatal(err) + } +} + +func BenchmarkLists400(b *testing.B) { + var user = User{ + Name: "Walter", + } + for i := 0; i < 400; i++ { + user.Friends = append(user.Friends, &User{Name: "Bob", Age: 28}) + } + + tdoc, _ := docxplate.OpenTemplate("test-data/lists.docx") + tdoc.Params(user) + if err := tdoc.ExportDocx("test-data/~test-lists.docx"); err != nil { + log.Fatal(err) + } +} + +func BenchmarkLists1000(b *testing.B) { + var user = User{ + Name: "Walter", + } + for i := 0; i < 1000; i++ { + user.Friends = append(user.Friends, &User{Name: "Bob", Age: 28}) + } + + tdoc, _ := docxplate.OpenTemplate("test-data/lists.docx") + tdoc.Params(user) + if err := tdoc.ExportDocx("test-data/~test-lists.docx"); err != nil { + log.Fatal(err) + } +} diff --git a/xml.node.go b/xml.node.go index a8af602..023d81a 100644 --- a/xml.node.go +++ b/xml.node.go @@ -25,15 +25,40 @@ var NodeSectionTypes = []string{"w-tbl", "w-p"} type xmlNode struct { XMLName xml.Name - Attrs []xml.Attr `xml:",any,attr"` - Content []byte `xml:",chardata"` - Nodes []*xmlNode `xml:",any"` + Attrs []*xml.Attr `xml:",any,attr"` + Content []byte `xml:",chardata"` + Nodes []*xmlNode `xml:",any"` parent *xmlNode isNew bool // added recently isDeleted bool } +func (xnode xmlNode) GetContentPrefixList() (ret []string) { + var record strings.Builder + start := false + length := len(xnode.Content) + for i, v := range xnode.Content { + if i == 0 { + continue + } + + if v == '{' && xnode.Content[i-1] == '{' { + start = true + continue + } + if start && (v == ' ' || (v == '}' && length-1 > i && xnode.Content[i+1] == '}')) { + ret = append(ret, record.String()) + record.Reset() + start = false + } + if start { + record.WriteByte(v) + } + } + return +} + func (xnode xmlNode) ContentHasPrefix(str string) bool { splitContent := bytes.Split(xnode.Content, []byte(str)) if len(splitContent) == 1 { @@ -69,6 +94,29 @@ func (xnode *xmlNode) Walk(fn func(*xmlNode)) { } } +// fn return true ,end walk +func (xnode *xmlNode) WalkWithEnd(fn func(*xmlNode) bool) { + // Using index to iterate nodes instead of for-range to process dynamic nodes + for i := 0; i < len(xnode.Nodes); i++ { + n := xnode.Nodes[i] + + if n == nil { + continue + } + + end := fn(n) // do your custom stuff + + if end { + continue + } + + if n.Nodes != nil { + // continue only if have deeper nodes + n.WalkWithEnd(fn) + } + } +} + // Walk down all nodes and do custom stuff with given function func (xnode *xmlNode) WalkTree(depth int, fn func(int, *xmlNode)) { for _, n := range xnode.Nodes { @@ -197,7 +245,7 @@ func (xnode *xmlNode) cloneAndAppend() *xmlNode { parent := xnode.parent // new copy node - nnew := xnode.clone() // parent cleaned + nnew := xnode.clone(parent) //set parent nnew.isDeleted = false nnew.isNew = true @@ -212,21 +260,12 @@ func (xnode *xmlNode) cloneAndAppend() *xmlNode { // Insert into specific index parent.Nodes = append(parent.Nodes[:i], append([]*xmlNode{nnew}, parent.Nodes[i:]...)...) - // cloned element have incorrect parents - so fixing it here - nnew.parent.Walk(func(nnew *xmlNode) { - for _, n := range nnew.Nodes { - if n != nil { - n.parent = nnew - } - } - }) - return nnew } // Copy node as new and all childs as new too // no shared addresses as it would be by only copying it -func (xnode *xmlNode) clone() *xmlNode { +func (xnode *xmlNode) clone(parent *xmlNode) *xmlNode { if xnode == nil { return nil } @@ -236,9 +275,10 @@ func (xnode *xmlNode) clone() *xmlNode { xnodeCopy.Nodes = nil xnodeCopy.isDeleted = false xnodeCopy.isNew = true + xnodeCopy.parent = parent for _, n := range xnode.Nodes { - xnodeCopy.Nodes = append(xnodeCopy.Nodes, n.clone()) + xnodeCopy.Nodes = append(xnodeCopy.Nodes, n.clone(xnodeCopy)) } return xnodeCopy