From d17784e361cc21cfc9671b55f0d7309a9ba5f762 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 15:53:23 -0700 Subject: [PATCH 01/10] Introduce a new batch cat file method for preparing to replace the old one --- modules/git/batch_cat_file.go | 117 ++++++++++++++++ modules/git/batch_cat_file_test.go | 148 +++++++++++++++++++++ modules/git/batch_reader.go | 1 + modules/git/command.go | 13 +- modules/git/commit_info_nogogit.go | 17 +-- modules/git/repo_language_stats_nogogit.go | 14 +- 6 files changed, 287 insertions(+), 23 deletions(-) create mode 100644 modules/git/batch_cat_file.go create mode 100644 modules/git/batch_cat_file_test.go diff --git a/modules/git/batch_cat_file.go b/modules/git/batch_cat_file.go new file mode 100644 index 0000000000000..921629a339e3f --- /dev/null +++ b/modules/git/batch_cat_file.go @@ -0,0 +1,117 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "strings" + "time" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/process" + "code.gitea.io/gitea/modules/util" +) + +type BatchCatFile struct { + cmd *exec.Cmd + startTime time.Time + stdin io.WriteCloser + stdout io.ReadCloser + cancel context.CancelFunc + finished process.FinishedFunc +} + +func NewBatchCatFile(ctx context.Context, repoPath string) (*BatchCatFile, error) { + callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + 1 /* parent */) + if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 { + callerInfo = callerInfo[pos+1:] + } + + a := make([]string, 0, 4) + a = append(a, debugQuote(GitExecutable)) + if len(globalCommandArgs) > 0 { + a = append(a, "...global...") + } + a = append(a, "cat-file", "--batch") + cmdLogString := strings.Join(a, " ") + + // these logs are for debugging purposes only, so no guarantee of correctness or stability + desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(repoPath), cmdLogString) + log.Debug("git.BatchCatFile: %s", desc) + + ctx, cancel, finished := process.GetManager().AddContext(ctx, desc) + + args := make([]string, 0, len(globalCommandArgs)+2) + for _, arg := range globalCommandArgs { + args = append(args, string(arg)) + } + args = append(args, "cat-file", "--batch") + cmd := exec.CommandContext(ctx, GitExecutable, args...) + cmd.Env = append(os.Environ(), CommonGitCmdEnvs()...) + cmd.Dir = repoPath + process.SetSysProcAttribute(cmd) + + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, err + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + if err := cmd.Start(); err != nil { + return nil, err + } + + return &BatchCatFile{ + cmd: cmd, + startTime: time.Now(), + stdin: stdin, + stdout: stdout, + cancel: cancel, + finished: finished, + }, nil +} + +func (b *BatchCatFile) Input(refs ...string) error { + var buf bytes.Buffer + for _, ref := range refs { + if _, err := buf.WriteString(ref + "\n"); err != nil { + return err + } + } + + _, err := b.stdin.Write(buf.Bytes()) + if err != nil { + return err + } + + return nil +} + +func (b *BatchCatFile) Reader() *bufio.Reader { + return bufio.NewReader(b.stdout) +} + +func (b *BatchCatFile) Escaped() time.Duration { + return time.Since(b.startTime) +} + +func (b *BatchCatFile) Cancel() { + b.cancel() +} + +func (b *BatchCatFile) Close() error { + b.finished() + _ = b.stdin.Close() + log.Debug("git.BatchCatFile: %v", b.Escaped()) + return b.cmd.Wait() +} diff --git a/modules/git/batch_cat_file_test.go b/modules/git/batch_cat_file_test.go new file mode 100644 index 0000000000000..1287c41958cbb --- /dev/null +++ b/modules/git/batch_cat_file_test.go @@ -0,0 +1,148 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "io" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func Test_GitBatchOperatorsNormal(t *testing.T) { + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + batch, err := NewBatchCatFile(context.Background(), bareRepo1Path) + assert.NoError(t, err) + assert.NotNil(t, batch) + defer batch.Close() + + err = batch.Input("refs/heads/master") + assert.NoError(t, err) + rd := batch.Reader() + assert.NotNil(t, rd) + + _, typ, size, err := ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + + // this step is very important, otherwise the next read will be wrong + s, err := rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + + err = batch.Input("ce064814f4a0d337b333e646ece456cd39fab612") + assert.NoError(t, err) + assert.NotNil(t, rd) + + _, typ, size, err = ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + + s, err = rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + + kases := []struct { + refname string + size int64 + }{ + {"refs/heads/master", 1075}, + {"feaf4ba6bc635fec442f46ddd4512416ec43c2c2", 1074}, + {"37991dec2c8e592043f47155ce4808d4580f9123", 239}, + } + + var inputs []string + for _, kase := range kases { + inputs = append(inputs, kase.refname) + } + + // input once for 3 refs + err = batch.Input(inputs...) + assert.NoError(t, err) + assert.NotNil(t, rd) + + for i := 0; i < 3; i++ { + _, typ, size, err = ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, kases[i].size, size) + + s, err := rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + } + + // input 3 times + for _, input := range inputs { + err = batch.Input(input) + assert.NoError(t, err) + assert.NotNil(t, rd) + } + + for i := 0; i < 3; i++ { + _, typ, size, err = ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, kases[i].size, size) + + s, err := rd.Discard(int(size)) + assert.NoError(t, err) + assert.EqualValues(t, size, s) + } +} + +func Test_GitBatchOperatorsCancel(t *testing.T) { + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + batch, err := NewBatchCatFile(context.Background(), bareRepo1Path) + assert.NoError(t, err) + assert.NotNil(t, batch) + defer batch.Close() + + err = batch.Input("refs/heads/master") + assert.NoError(t, err) + rd := batch.Reader() + assert.NotNil(t, rd) + + _, typ, size, err := ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + + go func() { + time.Sleep(time.Second) + batch.Cancel() + }() + // block here to wait cancel + _, err = io.ReadAll(rd) + assert.NoError(t, err) +} + +func Test_GitBatchOperatorsTimeout(t *testing.T) { + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + + ctx, _ := context.WithTimeout(context.Background(), 1*time.Second) + + batch, err := NewBatchCatFile(ctx, bareRepo1Path) + assert.NoError(t, err) + assert.NotNil(t, batch) + defer batch.Close() + + err = batch.Input("refs/heads/master") + assert.NoError(t, err) + rd := batch.Reader() + assert.NotNil(t, rd) + + _, typ, size, err := ReadBatchLine(rd) + assert.NoError(t, err) + assert.Equal(t, "commit", typ) + assert.Equal(t, int64(1075), size) + // block here until timeout + _, err = io.ReadAll(rd) + assert.NoError(t, err) +} diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 7bbab76bb821c..7216a2f7f6d57 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -165,6 +165,7 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er typ = typ[:idx] size, err = strconv.ParseInt(sizeStr, 10, 64) + return sha, typ, size, err } diff --git a/modules/git/command.go b/modules/git/command.go index d85a91804aac8..db4032962a1c7 100644 --- a/modules/git/command.go +++ b/modules/git/command.go @@ -60,15 +60,16 @@ func logArgSanitize(arg string) string { return arg } +var debugQuote = func(s string) string { + if strings.ContainsAny(s, " `'\"\t\r\n") { + return fmt.Sprintf("%q", s) + } + return s +} + func (c *Command) LogString() string { // WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space), // It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here. - debugQuote := func(s string) string { - if strings.ContainsAny(s, " `'\"\t\r\n") { - return fmt.Sprintf("%q", s) - } - return s - } a := make([]string, 0, len(c.args)+1) a = append(a, debugQuote(c.prog)) if c.globalArgsLength > 0 { diff --git a/modules/git/commit_info_nogogit.go b/modules/git/commit_info_nogogit.go index 7a6af0410bbc9..fb1f6a6085d9c 100644 --- a/modules/git/commit_info_nogogit.go +++ b/modules/git/commit_info_nogogit.go @@ -124,11 +124,13 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, return nil, err } - batchStdinWriter, batchReader, cancel, err := commit.repo.CatFileBatch(ctx) + batch, err := NewBatchCatFile(ctx, commit.repo.Path) if err != nil { return nil, err } - defer cancel() + defer batch.Close() + + rd := batch.Reader() commitsMap := map[string]*Commit{} commitsMap[commit.ID.String()] = commit @@ -145,25 +147,24 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, continue } - _, err := batchStdinWriter.Write([]byte(commitID + "\n")) - if err != nil { + if err := batch.Input(commitID); err != nil { return nil, err } - _, typ, size, err := ReadBatchLine(batchReader) + _, typ, size, err := ReadBatchLine(rd) if err != nil { return nil, err } if typ != "commit" { - if err := DiscardFull(batchReader, size+1); err != nil { + if err := DiscardFull(rd, size+1); err != nil { return nil, err } return nil, fmt.Errorf("unexpected type: %s for commit id: %s", typ, commitID) } - c, err = CommitFromReader(commit.repo, MustIDFromString(commitID), io.LimitReader(batchReader, size)) + c, err = CommitFromReader(commit.repo, MustIDFromString(commitID), io.LimitReader(rd, size)) if err != nil { return nil, err } - if _, err := batchReader.Discard(1); err != nil { + if _, err := rd.Discard(1); err != nil { return nil, err } commitCommits[path] = c diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index de7707bd6cd8b..2b1380d4266df 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -20,20 +20,16 @@ import ( func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, err := NewBatchCatFile(repo.Ctx, repo.Path) if err != nil { return nil, err } - defer cancel() + defer batch.Close() - writeID := func(id string) error { - _, err := batchStdinWriter.Write([]byte(id + "\n")) - return err - } - - if err := writeID(commitID); err != nil { + if err := batch.Input(commitID); err != nil { return nil, err } + batchReader := batch.Reader() shaBytes, typ, size, err := ReadBatchLine(batchReader) if typ != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) @@ -146,7 +142,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - if err := writeID(f.ID.String()); err != nil { + if err := batch.Input(f.ID.String()); err != nil { return nil, err } _, _, size, err := ReadBatchLine(batchReader) From 77c9340cc9492a91609841c28a08e075435801ab Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 19:31:27 -0700 Subject: [PATCH 02/10] don't create a new sub process --- modules/git/batch.go | 46 --------- modules/git/batch_cat_file.go | 15 ++- modules/git/batch_cat_file_test.go | 9 +- modules/git/batch_reader.go | 97 ------------------- modules/git/blob_nogogit.go | 14 +-- modules/git/commit_info_nogogit.go | 4 +- modules/git/pipeline/lfs_nogogit.go | 27 ++---- modules/git/repo_base_nogogit.go | 51 +++++----- modules/git/repo_branch_nogogit.go | 14 ++- modules/git/repo_commit_nogogit.go | 22 ++--- modules/git/repo_language_stats_nogogit.go | 4 +- modules/git/repo_tag_nogogit.go | 19 ++-- modules/git/repo_tree_nogogit.go | 8 +- modules/git/tree_entry_nogogit.go | 7 +- modules/git/tree_nogogit.go | 12 ++- modules/indexer/code/bleve/bleve.go | 19 ++-- .../code/elasticsearch/elasticsearch.go | 16 +-- 17 files changed, 122 insertions(+), 262 deletions(-) delete mode 100644 modules/git/batch.go diff --git a/modules/git/batch.go b/modules/git/batch.go deleted file mode 100644 index 3ec4f1ddccf84..0000000000000 --- a/modules/git/batch.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package git - -import ( - "bufio" - "context" -) - -type Batch struct { - cancel context.CancelFunc - Reader *bufio.Reader - Writer WriteCloserError -} - -func (repo *Repository) NewBatch(ctx context.Context) (*Batch, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repo.Path); err != nil { - return nil, err - } - - var batch Batch - batch.Writer, batch.Reader, batch.cancel = catFileBatch(ctx, repo.Path) - return &batch, nil -} - -func (repo *Repository) NewBatchCheck(ctx context.Context) (*Batch, error) { - // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! - if err := ensureValidGitRepository(ctx, repo.Path); err != nil { - return nil, err - } - - var check Batch - check.Writer, check.Reader, check.cancel = catFileBatchCheck(ctx, repo.Path) - return &check, nil -} - -func (b *Batch) Close() { - if b.cancel != nil { - b.cancel() - b.Reader = nil - b.Writer = nil - b.cancel = nil - } -} diff --git a/modules/git/batch_cat_file.go b/modules/git/batch_cat_file.go index 921629a339e3f..d4974a13c4901 100644 --- a/modules/git/batch_cat_file.go +++ b/modules/git/batch_cat_file.go @@ -28,18 +28,27 @@ type BatchCatFile struct { finished process.FinishedFunc } -func NewBatchCatFile(ctx context.Context, repoPath string) (*BatchCatFile, error) { +// NewBatchCatFile opens git cat-file --batch or --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function +// isCheck is true for --batch-check, false for --batch isCheck will only get metadata, --batch will get metadata and content +func NewBatchCatFile(ctx context.Context, repoPath string, isCheck bool) (*BatchCatFile, error) { + // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! + if err := ensureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + 1 /* parent */) if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 { callerInfo = callerInfo[pos+1:] } + batchArg := util.Iif(isCheck, "--batch-check", "--batch") + a := make([]string, 0, 4) a = append(a, debugQuote(GitExecutable)) if len(globalCommandArgs) > 0 { a = append(a, "...global...") } - a = append(a, "cat-file", "--batch") + a = append(a, "cat-file", batchArg) cmdLogString := strings.Join(a, " ") // these logs are for debugging purposes only, so no guarantee of correctness or stability @@ -52,7 +61,7 @@ func NewBatchCatFile(ctx context.Context, repoPath string) (*BatchCatFile, error for _, arg := range globalCommandArgs { args = append(args, string(arg)) } - args = append(args, "cat-file", "--batch") + args = append(args, "cat-file", batchArg) cmd := exec.CommandContext(ctx, GitExecutable, args...) cmd.Env = append(os.Environ(), CommonGitCmdEnvs()...) cmd.Dir = repoPath diff --git a/modules/git/batch_cat_file_test.go b/modules/git/batch_cat_file_test.go index 1287c41958cbb..f1734e9d56853 100644 --- a/modules/git/batch_cat_file_test.go +++ b/modules/git/batch_cat_file_test.go @@ -15,7 +15,7 @@ import ( func Test_GitBatchOperatorsNormal(t *testing.T) { bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") - batch, err := NewBatchCatFile(context.Background(), bareRepo1Path) + batch, err := NewBatchCatFile(t.Context(), bareRepo1Path, false) assert.NoError(t, err) assert.NotNil(t, batch) defer batch.Close() @@ -99,7 +99,7 @@ func Test_GitBatchOperatorsNormal(t *testing.T) { func Test_GitBatchOperatorsCancel(t *testing.T) { bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") - batch, err := NewBatchCatFile(context.Background(), bareRepo1Path) + batch, err := NewBatchCatFile(t.Context(), bareRepo1Path, false) assert.NoError(t, err) assert.NotNil(t, batch) defer batch.Close() @@ -126,9 +126,10 @@ func Test_GitBatchOperatorsCancel(t *testing.T) { func Test_GitBatchOperatorsTimeout(t *testing.T) { bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") - ctx, _ := context.WithTimeout(context.Background(), 1*time.Second) + ctx, cancel := context.WithTimeout(t.Context(), 1*time.Second) + defer cancel() - batch, err := NewBatchCatFile(ctx, bareRepo1Path) + batch, err := NewBatchCatFile(ctx, bareRepo1Path, false) assert.NoError(t, err) assert.NotNil(t, batch) defer batch.Close() diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 7216a2f7f6d57..8b41deeb61627 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -13,9 +13,6 @@ import ( "strings" "code.gitea.io/gitea/modules/log" - - "github.com/djherbis/buffer" - "github.com/djherbis/nio/v3" ) // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function @@ -40,100 +37,6 @@ func ensureValidGitRepository(ctx context.Context, repoPath string) error { return nil } -// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { - batchStdinReader, batchStdinWriter := io.Pipe() - batchStdoutReader, batchStdoutWriter := io.Pipe() - ctx, ctxCancel := context.WithCancel(ctx) - closed := make(chan struct{}) - cancel := func() { - ctxCancel() - _ = batchStdoutReader.Close() - _ = batchStdinWriter.Close() - <-closed - } - - // Ensure cancel is called as soon as the provided context is cancelled - go func() { - <-ctx.Done() - cancel() - }() - - go func() { - stderr := strings.Builder{} - err := NewCommand("cat-file", "--batch-check"). - Run(ctx, &RunOpts{ - Dir: repoPath, - Stdin: batchStdinReader, - Stdout: batchStdoutWriter, - Stderr: &stderr, - - UseContextTimeout: true, - }) - if err != nil { - _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) - _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) - } else { - _ = batchStdoutWriter.Close() - _ = batchStdinReader.Close() - } - close(closed) - }() - - // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check - batchReader := bufio.NewReader(batchStdoutReader) - - return batchStdinWriter, batchReader, cancel -} - -// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function -func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { - // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. - // so let's create a batch stdin and stdout - batchStdinReader, batchStdinWriter := io.Pipe() - batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) - ctx, ctxCancel := context.WithCancel(ctx) - closed := make(chan struct{}) - cancel := func() { - ctxCancel() - _ = batchStdinWriter.Close() - _ = batchStdoutReader.Close() - <-closed - } - - // Ensure cancel is called as soon as the provided context is cancelled - go func() { - <-ctx.Done() - cancel() - }() - - go func() { - stderr := strings.Builder{} - err := NewCommand("cat-file", "--batch"). - Run(ctx, &RunOpts{ - Dir: repoPath, - Stdin: batchStdinReader, - Stdout: batchStdoutWriter, - Stderr: &stderr, - - UseContextTimeout: true, - }) - if err != nil { - _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) - _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) - } else { - _ = batchStdoutWriter.Close() - _ = batchStdinReader.Close() - } - close(closed) - }() - - // For simplicities sake we'll us a buffered reader to read from the cat-file --batch - batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) - - return batchStdinWriter, batchReader, cancel -} - // ReadBatchLine reads the header line from cat-file --batch // We expect: SP SP LF // then leaving the rest of the stream " LF" to be read diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index af3ce376d6a46..f4200c1e5ddce 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -26,13 +26,14 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { return nil, err } - _, err = wr.Write([]byte(b.ID.String() + "\n")) - if err != nil { + rd := batch.Reader() + + if err = batch.Input(b.ID.String()); err != nil { cancel() return nil, err } @@ -67,18 +68,17 @@ func (b *Blob) Size() int64 { return b.size } - wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } defer cancel() - _, err = wr.Write([]byte(b.ID.String() + "\n")) - if err != nil { + if err = batch.Input(b.ID.String()); err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } - _, _, b.size, err = ReadBatchLine(rd) + _, _, b.size, err = ReadBatchLine(batch.Reader()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 diff --git a/modules/git/commit_info_nogogit.go b/modules/git/commit_info_nogogit.go index fb1f6a6085d9c..b96350004e221 100644 --- a/modules/git/commit_info_nogogit.go +++ b/modules/git/commit_info_nogogit.go @@ -124,11 +124,11 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, return nil, err } - batch, err := NewBatchCatFile(ctx, commit.repo.Path) + batch, cancel, err := commit.repo.CatFileBatch(ctx) if err != nil { return nil, err } - defer batch.Close() + defer cancel() rd := batch.Reader() diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index c5eed737011a6..7824ccc230c88 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -46,12 +46,13 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() + batchReader := batch.Reader() // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) trees := [][]byte{} @@ -63,15 +64,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for scan.Scan() { // Get the next commit ID - commitID := scan.Bytes() + commitID := scan.Text() // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) - if err != nil { + if err := batch.Input(commitID); err != nil { return nil, err } @@ -92,14 +88,13 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err if err != nil { return nil, err } - _, err = batchStdinWriter.Write([]byte(id + "\n")) - if err != nil { + if err = batch.Input(id); err != nil { return nil, err } continue case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, size)) if err != nil { return nil, err } @@ -107,7 +102,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { + if err := batch.Input(curCommit.Tree.ID.String()); err != nil { return nil, err } curPath = "" @@ -139,14 +134,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) - if err != nil { + if err := batch.Input(string(trees[len(trees)-1])); err != nil { return nil, err } + curPath = paths[len(paths)-1] trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 477e3b8742976..f0a909cf215f1 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -7,11 +7,10 @@ package git import ( - "bufio" "context" "path/filepath" - "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" ) @@ -26,10 +25,10 @@ type Repository struct { gpgSettings *GPGSettings batchInUse bool - batch *Batch + batch *BatchCatFile checkInUse bool - check *Batch + batchCheck *BatchCatFile Ctx context.Context LastCommitCache *LastCommitCache @@ -59,53 +58,57 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { } // CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { +func (repo *Repository) CatFileBatch(ctx context.Context) (*BatchCatFile, func(), error) { if repo.batch == nil { var err error - repo.batch, err = repo.NewBatch(ctx) + repo.batch, err = NewBatchCatFile(ctx, repo.Path, false) if err != nil { - return nil, nil, nil, err + return nil, nil, err } } if !repo.batchInUse { repo.batchInUse = true - return repo.batch.Writer, repo.batch.Reader, func() { + return repo.batch, func() { repo.batchInUse = false }, nil } - log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := repo.NewBatch(ctx) + setting.PanicInDevOrTesting("Opening temporary cat file batch for: %s", repo.Path) + tempBatch, err := NewBatchCatFile(ctx, repo.Path, false) if err != nil { - return nil, nil, nil, err + return nil, nil, err } - return tempBatch.Writer, tempBatch.Reader, tempBatch.Close, nil + return tempBatch, func() { + _ = tempBatch.Close() + }, nil } // CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func(), error) { - if repo.check == nil { +func (repo *Repository) CatFileBatchCheck(ctx context.Context) (*BatchCatFile, func(), error) { + if repo.batchCheck == nil { var err error - repo.check, err = repo.NewBatchCheck(ctx) + repo.batchCheck, err = NewBatchCatFile(ctx, repo.Path, true) if err != nil { - return nil, nil, nil, err + return nil, nil, err } } if !repo.checkInUse { repo.checkInUse = true - return repo.check.Writer, repo.check.Reader, func() { + return repo.batchCheck, func() { repo.checkInUse = false }, nil } - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := repo.NewBatchCheck(ctx) + setting.PanicInDevOrTesting("Opening temporary cat file batch with check for: %s", repo.Path) + tempBatch, err := NewBatchCatFile(ctx, repo.Path, true) if err != nil { - return nil, nil, nil, err + return nil, nil, err } - return tempBatchCheck.Writer, tempBatchCheck.Reader, tempBatchCheck.Close, nil + return tempBatch, func() { + _ = tempBatch.Close() + }, nil } func (repo *Repository) Close() error { @@ -117,9 +120,9 @@ func (repo *Repository) Close() error { repo.batch = nil repo.batchInUse = false } - if repo.check != nil { - repo.check.Close() - repo.check = nil + if repo.batchCheck != nil { + repo.batchCheck.Close() + repo.batchCheck = nil repo.checkInUse = false } repo.LastCommitCache = nil diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 0d11198523515..9e879939455af 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -22,18 +22,17 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = wr.Write([]byte(name + "\n")) - if err != nil { + if err = batch.Input(name); err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } - sha, _, _, err := ReadBatchLine(rd) + sha, _, _, err := ReadBatchLine(batch.Reader()) return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) } @@ -43,18 +42,17 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = wr.Write([]byte(name + "\n")) - if err != nil { + if err = batch.Input(name); err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } - _, _, _, err = ReadBatchLine(rd) + _, _, _, err = ReadBatchLine(batch.Reader()) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 5aa0e9ec0457d..bcd67d8a62091 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -33,16 +33,15 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = wr.Write([]byte(name + "\n")) - if err != nil { + if err = batch.Input(name); err != nil { return "", err } - shaBs, _, _, err := ReadBatchLine(rd) + shaBs, _, _, err := ReadBatchLine(batch.Reader()) if IsErrNotExist(err) { return "", ErrNotExist{name, ""} } @@ -73,15 +72,17 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(id.String() + "\n")) + if err = batch.Input(id.String()); err != nil { + return nil, err + } - return repo.getCommitFromBatchReader(rd, id) + return repo.getCommitFromBatchReader(batch.Reader(), id) } func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id ObjectID) (*Commit, error) { @@ -153,16 +154,15 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, err = wr.Write([]byte(commitID + "\n")) - if err != nil { + if err = batch.Input(commitID); err != nil { return nil, err } - sha, _, _, err := ReadBatchLine(rd) + sha, _, _, err := ReadBatchLine(batch.Reader()) if err != nil { if IsErrNotExist(err) { return nil, ErrNotExist{commitID, ""} diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 2b1380d4266df..df8c4452297d6 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -20,11 +20,11 @@ import ( func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batch, err := NewBatchCatFile(repo.Ctx, repo.Path) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } - defer batch.Close() + defer cancel() if err := batch.Input(commitID); err != nil { return nil, err diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index e0a3104249ebc..9fb44c47b9ee6 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -31,18 +31,20 @@ func (repo *Repository) GetTags(skip, limit int) (tags []string, err error) { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - wr, rd, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = wr.Write([]byte(id.String() + "\n")) - if err != nil { + if err = batch.Input(id.String()); err != nil { return "", err } - _, typ, _, err := ReadBatchLine(rd) - if IsErrNotExist(err) { - return "", ErrNotExist{ID: id.String()} + _, typ, _, err := ReadBatchLine(batch.Reader()) + if err != nil { + if IsErrNotExist(err) { + return "", ErrNotExist{ID: id.String()} + } + return "", err } return typ, nil } @@ -92,15 +94,16 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } // The tag is an annotated tag with a message. - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - if _, err := wr.Write([]byte(tagID.String() + "\n")); err != nil { + if err := batch.Input(tagID.String()); err != nil { return nil, err } + rd := batch.Reader() _, typ, size, err := ReadBatchLine(rd) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index d74769ccb2122..e40f777a460a7 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -10,13 +10,17 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - wr, rd, cancel, err := repo.CatFileBatch(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(id.String() + "\n")) + if err = batch.Input(id.String()); err != nil { + return nil, err + } + + rd := batch.Reader() // ignore the SHA _, typ, size, err := ReadBatchLine(rd) diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 81fb638d56fbe..6c0edd36ce855 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -36,18 +36,17 @@ func (te *TreeEntry) Size() int64 { return te.size } - wr, rd, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + batch, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } defer cancel() - _, err = wr.Write([]byte(te.ID.String() + "\n")) - if err != nil { + if err = batch.Input(te.ID.String()); err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } - _, _, te.size, err = ReadBatchLine(rd) + _, _, te.size, err = ReadBatchLine(batch.Reader()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index f88788418e27d..ebdd01b7ca837 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -33,13 +33,16 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - wr, rd, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + batch, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) + if err = batch.Input(t.ID.String()); err != nil { + return nil, err + } + rd := batch.Reader() _, typ, sz, err := ReadBatchLine(rd) if err != nil { return nil, err @@ -49,7 +52,10 @@ func (t *Tree) ListEntries() (Entries, error) { if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) + if err = batch.Input(treeID); err != nil { + return nil, err + } + _, typ, sz, err = ReadBatchLine(rd) if err != nil { return nil, err diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index e1a381f992c82..432ae10f48632 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -4,7 +4,6 @@ package bleve import ( - "bufio" "context" "fmt" "io" @@ -16,7 +15,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer" path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" "code.gitea.io/gitea/modules/indexer/code/internal" @@ -151,7 +149,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, batchCatFile *git.BatchCatFile, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -177,10 +175,12 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return b.addDelete(update.Filename, repo, batch) } - if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + if err := batchCatFile.Input(update.BlobSha); err != nil { return err } + batchReader := batchCatFile.Reader() + _, _, size, err = git.ReadBatchLine(batchReader) if err != nil { return err @@ -217,19 +217,14 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - gitBatch, err := r.NewBatch(ctx) + gitBatch, err := git.NewBatchCatFile(ctx, repo.RepoPath(), false) if err != nil { return err } - defer gitBatch.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, gitBatch.Writer, gitBatch.Reader, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, gitBatch, sha, update, repo, batch); err != nil { + gitBatch.Close() return err } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index be8efad5fd89d..da369b5413af4 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -4,7 +4,6 @@ package elasticsearch import ( - "bufio" "context" "fmt" "io" @@ -15,7 +14,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" @@ -138,7 +136,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, batch *git.BatchCatFile, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -161,9 +159,10 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batchWriter.Write([]byte(update.BlobSha + "\n")); err != nil { + if err := batch.Input(update.BlobSha); err != nil { return nil, err } + batchReader := batch.Reader() _, _, size, err = git.ReadBatchLine(batchReader) if err != nil { @@ -209,19 +208,14 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - r, err := gitrepo.OpenRepository(ctx, repo) - if err != nil { - return err - } - defer r.Close() - batch, err := r.NewBatch(ctx) + batch, err := git.NewBatchCatFile(ctx, repo.RepoPath(), false) if err != nil { return err } defer batch.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, batch, sha, update, repo) if err != nil { return err } From 861c6c66b7e1063a9f102898463b72b7da71762d Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 19:35:48 -0700 Subject: [PATCH 03/10] Fix copyright year --- modules/git/batch_cat_file.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/git/batch_cat_file.go b/modules/git/batch_cat_file.go index d4974a13c4901..c701cbb75244c 100644 --- a/modules/git/batch_cat_file.go +++ b/modules/git/batch_cat_file.go @@ -1,4 +1,4 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. +// Copyright 2025 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package git From aaec98d34e95604201382d2c608421c47602bfe7 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 19:40:42 -0700 Subject: [PATCH 04/10] Improvements --- modules/git/batch_reader.go | 1 - modules/git/commit_info_nogogit.go | 10 +++++----- modules/git/pipeline/lfs_nogogit.go | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 8b41deeb61627..3fb10d9cf09a7 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -68,7 +68,6 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er typ = typ[:idx] size, err = strconv.ParseInt(sizeStr, 10, 64) - return sha, typ, size, err } diff --git a/modules/git/commit_info_nogogit.go b/modules/git/commit_info_nogogit.go index b96350004e221..53dad5a08acf3 100644 --- a/modules/git/commit_info_nogogit.go +++ b/modules/git/commit_info_nogogit.go @@ -130,7 +130,7 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, } defer cancel() - rd := batch.Reader() + batchReader := batch.Reader() commitsMap := map[string]*Commit{} commitsMap[commit.ID.String()] = commit @@ -150,21 +150,21 @@ func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, if err := batch.Input(commitID); err != nil { return nil, err } - _, typ, size, err := ReadBatchLine(rd) + _, typ, size, err := ReadBatchLine(batchReader) if err != nil { return nil, err } if typ != "commit" { - if err := DiscardFull(rd, size+1); err != nil { + if err := DiscardFull(batchReader, size+1); err != nil { return nil, err } return nil, fmt.Errorf("unexpected type: %s for commit id: %s", typ, commitID) } - c, err = CommitFromReader(commit.repo, MustIDFromString(commitID), io.LimitReader(rd, size)) + c, err = CommitFromReader(commit.repo, MustIDFromString(commitID), io.LimitReader(batchReader, size)) if err != nil { return nil, err } - if _, err := rd.Discard(1); err != nil { + if _, err := batchReader.Discard(1); err != nil { return nil, err } commitCommits[path] = c diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 7824ccc230c88..e5bc1074d269a 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -137,7 +137,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err if err := batch.Input(string(trees[len(trees)-1])); err != nil { return nil, err } - curPath = paths[len(paths)-1] trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] From fb91a8589d76b3e305f24119df7f5b63e2b119b2 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 21:30:14 -0700 Subject: [PATCH 05/10] Fix duplicated opened batch cat file --- modules/git/repo_commit_nogogit.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index bcd67d8a62091..eacad401653f3 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -6,7 +6,6 @@ package git import ( - "bufio" "errors" "io" "strings" @@ -82,10 +81,11 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { return nil, err } - return repo.getCommitFromBatchReader(batch.Reader(), id) + return repo.getCommitFromBatchReader(batch, id) } -func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id ObjectID) (*Commit, error) { +func (repo *Repository) getCommitFromBatchReader(batch *BatchCatFile, id ObjectID) (*Commit, error) { + rd := batch.Reader() _, typ, size, err := ReadBatchLine(rd) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { @@ -113,7 +113,11 @@ func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id ObjectID) return nil, err } - commit, err := tag.Commit(repo) + if err = batch.Input(tag.Object.String()); err != nil { + return nil, err + } + + commit, err := repo.getCommitFromBatchReader(batch, tag.Object) if err != nil { return nil, err } From ba73dd0dfe19aa21fdfcbca41153307d7e343a8c Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 22:16:58 -0700 Subject: [PATCH 06/10] Fix test --- tests/integration/git_misc_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/git_misc_test.go b/tests/integration/git_misc_test.go index bf9e2c02ab38b..11cee48858e4b 100644 --- a/tests/integration/git_misc_test.go +++ b/tests/integration/git_misc_test.go @@ -56,10 +56,10 @@ func TestDataAsyncDoubleRead_Issue29101(t *testing.T) { b := entry.Blob() r1, err := b.DataAsync() assert.NoError(t, err) - defer r1.Close() + r1.Close() r2, err := b.DataAsync() assert.NoError(t, err) - defer r2.Close() + r2.Close() var data1, data2 []byte wg := sync.WaitGroup{} From acfa35a3c993f8af93cc31e957c2821d69f49ed5 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 18 Mar 2025 23:25:39 -0700 Subject: [PATCH 07/10] Revert change --- tests/integration/git_misc_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/git_misc_test.go b/tests/integration/git_misc_test.go index 11cee48858e4b..bf9e2c02ab38b 100644 --- a/tests/integration/git_misc_test.go +++ b/tests/integration/git_misc_test.go @@ -56,10 +56,10 @@ func TestDataAsyncDoubleRead_Issue29101(t *testing.T) { b := entry.Blob() r1, err := b.DataAsync() assert.NoError(t, err) - r1.Close() + defer r1.Close() r2, err := b.DataAsync() assert.NoError(t, err) - r2.Close() + defer r2.Close() var data1, data2 []byte wg := sync.WaitGroup{} From 97fc21b0c3fba84297bd167d526fc3cf45c49672 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 19 Mar 2025 09:22:34 -0700 Subject: [PATCH 08/10] Fix test --- modules/git/repo_base_nogogit.go | 4 +++- modules/setting/git.go | 3 +++ tests/integration/git_misc_test.go | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index f0a909cf215f1..a7d31b568d7e4 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -74,7 +74,9 @@ func (repo *Repository) CatFileBatch(ctx context.Context) (*BatchCatFile, func() }, nil } - setting.PanicInDevOrTesting("Opening temporary cat file batch for: %s", repo.Path) + if !setting.DisableTempCatFileBatchCheck { + setting.PanicInDevOrTesting("Opening temporary cat file batch for: %s", repo.Path) + } tempBatch, err := NewBatchCatFile(ctx, repo.Path, false) if err != nil { return nil, nil, err diff --git a/modules/setting/git.go b/modules/setting/git.go index 48a4e7f30deb5..12ce9903def06 100644 --- a/modules/setting/git.go +++ b/modules/setting/git.go @@ -11,6 +11,9 @@ import ( "code.gitea.io/gitea/modules/log" ) +// DisableTempCatFileBatchCheck disables the temporary cat-file batch check +var DisableTempCatFileBatchCheck = false + // Git settings var Git = struct { Path string diff --git a/tests/integration/git_misc_test.go b/tests/integration/git_misc_test.go index bf9e2c02ab38b..84d0a8cfe9190 100644 --- a/tests/integration/git_misc_test.go +++ b/tests/integration/git_misc_test.go @@ -18,12 +18,18 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" files_service "code.gitea.io/gitea/services/repository/files" "github.com/stretchr/testify/assert" ) func TestDataAsyncDoubleRead_Issue29101(t *testing.T) { + // in this test, we will have two parallel readers reading the same blob + // So we need to ignore the temporary CatFileBranch checking to make the test pass + defer test.MockVariableValue(&setting.DisableTempCatFileBatchCheck, true) + onGiteaRun(t, func(t *testing.T, u *url.URL) { user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1}) From 798332a714083fa319bb905bf048457a5b81c9ea Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 19 Mar 2025 09:45:53 -0700 Subject: [PATCH 09/10] Fix test --- tests/integration/git_misc_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/git_misc_test.go b/tests/integration/git_misc_test.go index 84d0a8cfe9190..f84ee43991c39 100644 --- a/tests/integration/git_misc_test.go +++ b/tests/integration/git_misc_test.go @@ -28,7 +28,7 @@ import ( func TestDataAsyncDoubleRead_Issue29101(t *testing.T) { // in this test, we will have two parallel readers reading the same blob // So we need to ignore the temporary CatFileBranch checking to make the test pass - defer test.MockVariableValue(&setting.DisableTempCatFileBatchCheck, true) + defer test.MockVariableValue(&setting.DisableTempCatFileBatchCheck, true)() onGiteaRun(t, func(t *testing.T, u *url.URL) { user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) From 26eca64ad3286c2dc9b462b4c9a54339d37ac417 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 19 Mar 2025 10:43:50 -0700 Subject: [PATCH 10/10] avoid to create uncessary temporary cat file sub process --- routers/web/repo/view_file.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/routers/web/repo/view_file.go b/routers/web/repo/view_file.go index 4ce7a8e3a4480..3defd310e674e 100644 --- a/routers/web/repo/view_file.go +++ b/routers/web/repo/view_file.go @@ -33,6 +33,15 @@ import ( func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["IsViewFile"] = true ctx.Data["HideRepoInfo"] = true + commit, err := ctx.Repo.Commit.GetCommitByPath(ctx.Repo.TreePath) + if err != nil { + ctx.ServerError("GetCommitByPath", err) + return + } + if !loadLatestCommitData(ctx, commit) { + return + } + blob := entry.Blob() buf, dataRc, fInfo, err := getFileReader(ctx, ctx.Repo.Repository.ID, blob) if err != nil { @@ -46,16 +55,6 @@ func prepareToRenderFile(ctx *context.Context, entry *git.TreeEntry) { ctx.Data["FileName"] = blob.Name() ctx.Data["RawFileLink"] = ctx.Repo.RepoLink + "/raw/" + ctx.Repo.RefTypeNameSubURL() + "/" + util.PathEscapeSegments(ctx.Repo.TreePath) - commit, err := ctx.Repo.Commit.GetCommitByPath(ctx.Repo.TreePath) - if err != nil { - ctx.ServerError("GetCommitByPath", err) - return - } - - if !loadLatestCommitData(ctx, commit) { - return - } - if ctx.Repo.TreePath == ".editorconfig" { _, editorconfigWarning, editorconfigErr := ctx.Repo.GetEditorconfig(ctx.Repo.Commit) if editorconfigWarning != nil {