diff --git a/modules/actions/workflows.go b/modules/actions/workflows.go index 0d2b0dd9194d9..28be0fbf1aeae 100644 --- a/modules/actions/workflows.go +++ b/modules/actions/workflows.go @@ -55,17 +55,16 @@ func ListWorkflows(commit *git.Commit) (git.Entries, error) { return nil, err } - entries, err := tree.ListEntriesRecursiveFast() - if err != nil { - return nil, err - } - - ret := make(git.Entries, 0, len(entries)) - for _, entry := range entries { + ret := make(git.Entries, 0, 5) + if err := tree.IterateEntriesRecursive(func(entry *git.TreeEntry) error { if strings.HasSuffix(entry.Name(), ".yml") || strings.HasSuffix(entry.Name(), ".yaml") { ret = append(ret, entry) } + return nil + }, nil); err != nil { + return nil, err } + return ret, nil } diff --git a/modules/git/parse_nogogit.go b/modules/git/parse_nogogit.go index 676bb3c76c09f..848c2076908eb 100644 --- a/modules/git/parse_nogogit.go +++ b/modules/git/parse_nogogit.go @@ -22,6 +22,13 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { // parseTreeEntries FIXME this function's design is not right, it should make the caller read all data into memory func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1) + return entries, iterateTreeEntries(data, ptree, func(entry *TreeEntry) error { + entries = append(entries, entry) + return nil + }) +} + +func iterateTreeEntries(data []byte, ptree *Tree, f func(entry *TreeEntry) error) error { for pos := 0; pos < len(data); { posEnd := bytes.IndexByte(data[pos:], '\n') if posEnd == -1 { @@ -33,7 +40,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { line := data[pos:posEnd] lsTreeLine, err := parseLsTreeLine(line) if err != nil { - return nil, err + return err } entry := &TreeEntry{ ptree: ptree, @@ -44,9 +51,11 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { sized: lsTreeLine.Size.Has(), } pos = posEnd + 1 - entries = append(entries, entry) + if err := f(entry); err != nil { + return err + } } - return entries, nil + return nil } func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) { diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index de7707bd6cd8b..098f29707cf07 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -57,11 +57,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err tree := commit.Tree - entries, err := tree.ListEntriesRecursiveWithSize() - if err != nil { - return nil, err - } - checker, deferable := repo.CheckAttributeReader(commitID) defer deferable() @@ -77,18 +72,12 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err firstExcludedLanguage := "" firstExcludedLanguageSize := int64(0) - for _, f := range entries { - select { - case <-repo.Ctx.Done(): - return sizes, repo.Ctx.Err() - default: - } - + if err := tree.IterateEntriesRecursive(func(f *TreeEntry) error { contentBuf.Reset() content = contentBuf.Bytes() if f.Size() == 0 { - continue + return nil } isVendored := optional.None[bool]() @@ -101,22 +90,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if err == nil { isVendored = AttributeToBool(attrs, AttributeLinguistVendored) if isVendored.ValueOrDefault(false) { - continue + return nil } isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated) if isGenerated.ValueOrDefault(false) { - continue + return nil } isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation) if isDocumentation.ValueOrDefault(false) { - continue + return nil } isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable) if !isDetectable.ValueOrDefault(true) { - continue + return nil } hasLanguage := TryReadLanguageAttribute(attrs) @@ -131,7 +120,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err // this language will always be added to the size sizes[language] += f.Size() - continue + return nil } } } @@ -140,19 +129,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err enry.IsDotFile(f.Name()) || (!isDocumentation.Has() && enry.IsDocumentation(f.Name())) || enry.IsConfiguration(f.Name()) { - continue + return nil } // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { if err := writeID(f.ID.String()); err != nil { - return nil, err + return err } _, _, size, err := ReadBatchLine(batchReader) if err != nil { log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) - return nil, err + return err } sizeToRead := size @@ -164,22 +153,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) if err != nil { - return nil, err + return err } content = contentBuf.Bytes() if err := DiscardFull(batchReader, discard); err != nil { - return nil, err + return err } } if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) { - continue + return nil } // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary? // - eg. do the all the detection tests using filename first before reading content. language := analyze.GetCodeLanguage(f.Name(), content) if language == "" { - continue + return nil } // group languages, such as Pug -> HTML; SCSS -> CSS @@ -200,6 +189,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err firstExcludedLanguage = language firstExcludedLanguageSize += f.Size() } + return nil + }, TrustedCmdArgs{"--long"}); err != nil { // add --long to get size + return sizes, err } // If there are no included languages add the first excluded language diff --git a/modules/git/tree_gogit.go b/modules/git/tree_gogit.go index 421b0ecb0f0f9..59f6129b2abb9 100644 --- a/modules/git/tree_gogit.go +++ b/modules/git/tree_gogit.go @@ -58,6 +58,24 @@ func (t *Tree) ListEntries() (Entries, error) { // ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { + var entries []*TreeEntry + if err := t.IterateEntriesRecursive(func(entry *TreeEntry) error { + entries = append(entries, convertedEntry) + return nil + }, nil); err != nil { + return nil, err + } + return entries, nil +} + +// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version +func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { + return t.ListEntriesRecursiveWithSize() +} + +// IterateEntriesRecursive returns iterate entries of current tree recursively including all subtrees +// extraArgs could be "-l" to get the size, which is slower +func (t *Tree) IterateEntriesRecursive(f func(entry *TreeEntry) error, extraArgs TrustedCmdArgs) error { if t.gogitTree == nil { err := t.loadTreeObject() if err != nil { @@ -65,7 +83,6 @@ func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { } } - var entries []*TreeEntry seen := map[plumbing.Hash]bool{} walker := object.NewTreeWalker(t.gogitTree, true, seen) for { @@ -86,13 +103,10 @@ func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { ptree: t, fullName: fullName, } - entries = append(entries, convertedEntry) + if err := f(convertedEntry); err != nil { + return nil, err + } } - return entries, nil -} - -// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version -func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { - return t.ListEntriesRecursiveWithSize() + return nil } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 993b98edc2994..89454df047407 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -6,6 +6,7 @@ package git import ( + "bufio" "io" "strings" ) @@ -96,21 +97,17 @@ func (t *Tree) listEntriesRecursive(extraArgs TrustedCmdArgs) (Entries, error) { return t.entriesRecursive, nil } - stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r"). - AddArguments(extraArgs...). - AddDynamicArguments(t.ID.String()). - RunStdBytes(&RunOpts{Dir: t.repo.Path}) - if runErr != nil { - return nil, runErr - } - - var err error - t.entriesRecursive, err = parseTreeEntries(stdout, t) - if err == nil { - t.entriesRecursiveParsed = true + t.entriesRecursive = make([]*TreeEntry, 0) + if err := t.IterateEntriesRecursive(func(entry *TreeEntry) error { + t.entriesRecursive = append(t.entriesRecursive, entry) + return nil + }, extraArgs); err != nil { + t.entriesRecursive = nil + return nil, err } - return t.entriesRecursive, err + t.entriesRecursiveParsed = true + return t.entriesRecursive, nil } // ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size @@ -122,3 +119,50 @@ func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { return t.listEntriesRecursive(TrustedCmdArgs{"--long"}) } + +// IterateEntriesRecursive returns iterate entries of current tree recursively including all subtrees +// extraArgs could be "-l" to get the size, which is slower +func (t *Tree) IterateEntriesRecursive(f func(entry *TreeEntry) error, extraArgs TrustedCmdArgs) error { + reader, writer := io.Pipe() + done := make(chan error) + + go func(t *Tree, done chan error, writer *io.PipeWriter) { + runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r"). + AddArguments(extraArgs...). + AddDynamicArguments(t.ID.String()). + Run(&RunOpts{ + Dir: t.repo.Path, + Stdout: writer, + }) + + _ = writer.Close() + + done <- runErr + }(t, done, writer) + + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + if err := scanner.Err(); err != nil { + return err + } + + data := scanner.Bytes() + if err := iterateTreeEntries(data, t, func(entry *TreeEntry) error { + if err := f(entry); err != nil { + return err + } + + select { + case <-t.repo.Ctx.Done(): + return t.repo.Ctx.Err() + case runErr := <-done: + return runErr + default: + return nil + } + }); err != nil { + return err + } + } + return nil +}