Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use tar.Header references over copies in tar index #271

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/file/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (m ManualInfo) Sys() any {
return m.SysValue
}

func NewMetadata(header tar.Header, content io.Reader) Metadata {
func NewMetadata(header *tar.Header, content io.Reader) Metadata {
return Metadata{
FileInfo: header.FileInfo(),
Path: path.Clean(DirSeparator + header.Name),
Expand Down
5 changes: 4 additions & 1 deletion pkg/file/mime_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ func MIMEType(reader io.Reader) string {
return ""
}

s := sizer{reader: reader}
// though there is mimetype.SetLimit() at our disposal, that is a static resource which could be set by another
// library. To avoid any potential conflicts, we'll limit the reader ourselves. This is more of a safety measure
// to prevent performance regression than it is a performance optimization.
s := sizer{reader: io.LimitReader(reader, 3072)}

var mTypeStr string
mType, err := mimetype.DetectReader(&s)
Expand Down
1 change: 0 additions & 1 deletion pkg/file/tar_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ func NewTarIndex(tarFilePath string, onIndex TarIndexVisitor) (*TarIndex, error)
// body payload starts (after the header has been read).
indexEntry := TarIndexEntry{
path: tarFileHandle.Name(),
sequence: entry.Sequence,
header: entry.Header,
seekPosition: entrySeekPosition,
}
Expand Down
8 changes: 3 additions & 5 deletions pkg/file/tar_index_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@ import (

type TarIndexEntry struct {
path string
sequence int64
header tar.Header
header *tar.Header
seekPosition int64
}

func (t *TarIndexEntry) ToTarFileEntry() TarFileEntry {
return TarFileEntry{
Sequence: t.sequence,
Header: t.header,
Reader: t.Open(),
Header: t.header,
Reader: t.Open(),
}
}

Expand Down
15 changes: 5 additions & 10 deletions pkg/file/tarutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@ type tarFile struct {

// TarFileEntry represents the header, contents, and list position of an entry within a tar file.
type TarFileEntry struct {
Sequence int64
Header tar.Header
Reader io.Reader
Header *tar.Header
Reader io.Reader
}

// TarFileVisitor is a visitor function meant to be used in conjunction with the IterateTar.
Expand All @@ -48,10 +47,7 @@ func (e *ErrFileNotFound) Error() string {
// or if the visitor function returns a ErrTarStopIteration sentinel error.
func IterateTar(reader io.Reader, visitor TarFileVisitor) error {
tarReader := tar.NewReader(reader)
var sequence int64 = -1
for {
sequence++

hdr, err := tarReader.Next()
if errors.Is(err, io.EOF) {
break
Expand All @@ -64,9 +60,8 @@ func IterateTar(reader io.Reader, visitor TarFileVisitor) error {
}

if err := visitor(TarFileEntry{
Sequence: sequence,
Header: *hdr,
Reader: tarReader,
Header: hdr,
Reader: tarReader,
}); err != nil {
if errors.Is(err, ErrTarStopIteration) {
return nil
Expand Down Expand Up @@ -144,7 +139,7 @@ type tarVisitor struct {
}

func (v tarVisitor) visit(entry TarFileEntry) error {
target := filepath.Join(v.destination, entry.Header.Name)
target := filepath.Join(v.destination, entry.Header.Name) //nolint:gosec // we are checking path traversal issues just below this line

// we should not allow for any destination path to be outside of where we are unarchiving to
// "." is a special case that we allow (it is the root of the unarchived content)
Expand Down
30 changes: 10 additions & 20 deletions pkg/file/tarutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "regular file is written",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeReg,
Name: "file.txt",
Linkname: "",
Expand All @@ -268,8 +267,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "regular file with possible path traversal errors out",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeReg,
Name: "../file.txt",
Linkname: "",
Expand All @@ -282,8 +280,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "local . index is not a traversal error and should skip",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeDir,
Name: ".",
Linkname: "",
Expand All @@ -296,8 +293,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "regular file with possible path traversal errors out (same prefix)",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeReg,
Name: "../tmp-file.txt",
Linkname: "",
Expand All @@ -310,8 +306,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "directory is created",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeDir,
Name: "dir",
Linkname: "",
Expand All @@ -327,8 +322,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "symlink is ignored",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeSymlink,
Name: "symlink",
Linkname: "./../to-location",
Expand All @@ -344,8 +338,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "hardlink is ignored",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeLink,
Name: "link",
Linkname: "./../to-location",
Expand All @@ -361,8 +354,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "device is ignored",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeChar,
Name: "device",
},
Expand All @@ -377,8 +369,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "block device is ignored",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeBlock,
Name: "device",
},
Expand All @@ -393,8 +384,7 @@ func Test_tarVisitor_visit(t *testing.T) {
{
name: "pipe is ignored",
entry: TarFileEntry{
Sequence: 0,
Header: tar.Header{
Header: &tar.Header{
Typeflag: tar.TypeFifo,
Name: "pipe",
},
Expand Down
Loading