From ed1973551b80f5fdfb90f6a950196c26e773117a Mon Sep 17 00:00:00 2001 From: Jason Bedard Date: Sat, 9 Nov 2024 23:35:45 -0800 Subject: [PATCH] refactor: change pathTrie per-file to per-directory (#1931) Now the `pathTrie` aligns with the directory tree, not full fs tree which includes files. Aligning with the directory tree makes more sense to me, and I think will make it simpler to do things like load BUILDs concurrently while doing the fs-traversal (and persisting that in the `pathTrie`) etc. **What type of PR is this?** > Other **What package or component does this PR mostly affect?** > all **What does this PR do? Why is it needed?** Performance + simplifying things for future changes. --- walk/walk.go | 64 +++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/walk/walk.go b/walk/walk.go index 48cc835a3..797aaa943 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -23,7 +23,6 @@ import ( "os" "path" "path/filepath" - "sort" "strings" "github.com/bazelbuild/bazel-gazelle/config" @@ -135,19 +134,10 @@ func Walk(c *config.Config, cexts []config.Configurer, dirs []string, mode Mode, func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[string]bool, updateRels *UpdateFilter, trie *pathTrie, wf WalkFunc, rel string, updateParent bool) { haveError := false - ents := make([]fs.DirEntry, 0, len(trie.children)) - for _, node := range trie.children { - ents = append(ents, *node.entry) - } - - sort.Slice(ents, func(i, j int) bool { - return ents[i].Name() < ents[j].Name() - }) - // Absolute path to the directory being visited dir := filepath.Join(c.RepoRoot, rel) - f, err := loadBuildFile(c, rel, dir, ents) + f, err := loadBuildFile(c, rel, dir, trie.files) if err != nil { log.Print(err) if c.Strict { @@ -165,28 +155,35 @@ func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[stri return } - var subdirs, regularFiles []string - for _, ent := range ents { + // Filter and collect files + var regularFiles []string + for _, ent := range trie.files { base := ent.Name() entRel := path.Join(rel, base) if wc.isExcluded(entRel) { continue } - ent := resolveFileInfo(wc, dir, entRel, ent) - switch { - case ent == nil: - continue - case ent.IsDir(): - subdirs = append(subdirs, base) - default: + if ent := resolveFileInfo(wc, dir, entRel, ent); ent != nil { regularFiles = append(regularFiles, base) } } shouldUpdate := updateRels.shouldUpdate(rel, updateParent) - for _, sub := range subdirs { - if subRel := path.Join(rel, sub); updateRels.shouldVisit(subRel, shouldUpdate) { - visit(c, cexts, knownDirectives, updateRels, trie.children[sub], wf, subRel, shouldUpdate) + + // Filter and collect subdirectories + var subdirs []string + for _, t := range trie.children { + base := t.entry.Name() + entRel := path.Join(rel, base) + if wc.isExcluded(entRel) { + continue + } + if ent := resolveFileInfo(wc, dir, entRel, t.entry); ent != nil { + subdirs = append(subdirs, base) + + if updateRels.shouldVisit(entRel, shouldUpdate) { + visit(c, cexts, knownDirectives, updateRels, t, wf, entRel, shouldUpdate) + } } } @@ -361,19 +358,20 @@ func resolveFileInfo(wc *walkConfig, dir, rel string, ent fs.DirEntry) fs.DirEnt } type pathTrie struct { - children map[string]*pathTrie - entry *fs.DirEntry + entry fs.DirEntry + files []fs.DirEntry + children []*pathTrie } // Basic factory method to ensure the entry is properly copied func newTrie(entry fs.DirEntry) *pathTrie { - return &pathTrie{entry: &entry} + return &pathTrie{ + entry: entry, + } } func buildTrie(c *config.Config, isIgnored isIgnoredFunc) (*pathTrie, error) { - trie := &pathTrie{ - children: map[string]*pathTrie{}, - } + trie := &pathTrie{} // A channel to limit the number of concurrent goroutines limitCh := make(chan struct{}, 100) @@ -406,14 +404,14 @@ func walkDir(root, rel string, eg *errgroup.Group, limitCh chan struct{}, isIgno continue } - entryTrie := newTrie(entry) - trie.children[entry.Name()] = entryTrie - if entry.IsDir() { - entryTrie.children = map[string]*pathTrie{} + entryTrie := newTrie(entry) + trie.children = append(trie.children, entryTrie) eg.Go(func() error { return walkDir(root, entryPath, eg, limitCh, isIgnored, entryTrie) }) + } else { + trie.files = append(trie.files, entry) } } return nil