Skip to content
This repository has been archived by the owner on May 24, 2024. It is now read-only.

Commit

Permalink
Allow specifying a tmp dir
Browse files Browse the repository at this point in the history
  • Loading branch information
liuziba committed Jun 21, 2022
1 parent 98f46ce commit 3c7b420
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 10 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ GLOBAL OPTIONS:
--input value, -i value File to read list of files, or '-' if from stdin (default: "-")
--piece-size value, -s value Target piece size, default to minimum possible value (default: 0)
--out-dir value, -o value Output directory to save the car file (default: ".")
--tmp-dir value, -t value Optionally copy the files to a temporary (and much faster) directory
--parent value, -p value Parent path of the dataset
--help, -h show help (default: false)
```
Expand All @@ -37,3 +38,5 @@ The input file can be a text file that contains a list of file infomation. i.e.
}
]
```
The tmp dir is useful when the dataset source is on slow storage such as NFS or S3FS/Goofys mount.
9 changes: 8 additions & 1 deletion generate-car.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ func main() {
Usage: "Output directory to save the car file",
Value: ".",
},
&cli.StringFlag{
Name: "tmp-dir",
Aliases: []string{"t"},
Usage: "Optionally copy the files to a temporary (and much faster) directory",
Value: "",
},
&cli.StringFlag{
Name: "parent",
Aliases: []string{"p"},
Expand All @@ -80,6 +86,7 @@ func main() {
pieceSizeInput := c.Uint64("piece-size")
outDir := c.String("out-dir")
parent := c.String("parent")
tmpDir := c.String("tmp-dir")
var inputBytes []byte
if inputFile == "-" {
reader := bufio.NewReader(os.Stdin)
Expand Down Expand Up @@ -110,7 +117,7 @@ func main() {
}
cp := new(commp.Calc)
writer := bufio.NewWriterSize(io.MultiWriter(carF, cp), BufSize)
ipld, cid, err := util.GenerateCar(ctx, input, parent, writer)
ipld, cid, err := util.GenerateCar(ctx, input, parent, tmpDir, writer)
if err != nil {
return err
}
Expand Down
12 changes: 6 additions & 6 deletions spec/generate-car_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
]
}
}
stdout = `./generate-car -i test/test.json -o test -p test`
stdout = `./generate-car -i test/test.json -o test -p test -t tmpdir`
result = JSON.parse(stdout)
expectDataCid = JSON.parse(expectIpld)['Hash']
expect(result['DataCid']).to eq(expectDataCid)
Expand Down Expand Up @@ -57,7 +57,7 @@
]
}
}
stdout = `./generate-car -i test/test.json -o test -p .`
stdout = `./generate-car -i test/test.json -o test -p . -t tmpdir`
result = JSON.parse(stdout)
expectDataCid = JSON.parse(expectIpld)['Hash']
expect(result['DataCid']).to eq(expectDataCid)
Expand Down Expand Up @@ -86,7 +86,7 @@
]
}
}
stdout = `./generate-car -i test/test-partial.json -o test -p test`
stdout = `./generate-car -i test/test-partial.json -o test -p test -t tmpdir`
result = JSON.parse(stdout)
expectDataCid = JSON.parse(expectIpld)['Hash']
expect(result['DataCid']).to eq(expectDataCid)
Expand Down Expand Up @@ -121,7 +121,7 @@
]
}
}
stdout = `./generate-car -i test/test-multiple.json -o test -p test`
stdout = `./generate-car -i test/test-multiple.json -o test -p test -t tmpdir`
result = JSON.parse(stdout)
expectDataCid = JSON.parse(expectIpld)['Hash']
expect(result['DataCid']).to eq(expectDataCid)
Expand Down Expand Up @@ -208,7 +208,7 @@
]
}
}
stdout = `./generate-car -i test/test-link.json -o test -p test`
stdout = `./generate-car -i test/test-link.json -o test -p test -t tmpdir`
result = JSON.parse(stdout)
expectDataCid = JSON.parse(expectIpld)['Hash']
expect(result['DataCid']).to eq(expectDataCid)
Expand Down Expand Up @@ -356,7 +356,7 @@
]
}
}
stdout = `./generate-car -i generated_test/test.json -o test -p generated_test`
stdout = `./generate-car -i generated_test/test.json -o test -p generated_test -t tmpdir`
result = JSON.parse(stdout)
expectDataCid = JSON.parse(expectIpld)['Hash']
expect(result['DataCid']).to eq(expectDataCid)
Expand Down
51 changes: 48 additions & 3 deletions util/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,21 @@ func (fs *fileSlice) Read(p []byte) (n int, err error) {
return copy(p, b), io.EOF
}

func GenerateCar(ctx context.Context, fileList []Finfo, parentPath string, output io.Writer) (ipldDag *FsNode, cid string, err error) {
func GenerateCar(ctx context.Context, fileList []Finfo, parentPath string, tmpDir string, output io.Writer) (ipldDag *FsNode, cid string, err error) {
batching := dss.MutexWrap(datastore.NewMapDatastore())
bs1 := bstore.NewBlockstore(batching)
absParentPath, err := filepath.Abs(parentPath)
if err != nil {
logger.Warn(err)
return
}
if tmpDir != "" {
absParentPath, err = filepath.Abs(tmpDir)
if err != nil {
logger.Warn(err)
return
}
}
fm := filestore.NewFileManager(batching, absParentPath)
fm.AllowFiles = true
bs2 := filestore.NewFilestore(bs1, fm)
Expand All @@ -143,14 +150,52 @@ func GenerateCar(ctx context.Context, fileList []Finfo, parentPath string, outpu
layers = append(layers, rootNode)
previous := []string{""}
for _, item := range fileList {
if item.End == 0 {
item.End = item.Size
}
var node ipld.Node
var path string
path, err = filepath.Rel(filepath.Clean(parentPath), filepath.Clean(item.Path))
if tmpDir != "" {
tmpPath := filepath.Join(filepath.Clean(tmpDir), path)
err = os.MkdirAll(filepath.Dir(tmpPath), 0777)
if err != nil {
logger.Warn(err)
return
}
// copy file
source, err := os.Open(item.Path)
if err != nil {
logger.Warn(err)
return nil, "", err
}
defer source.Close()
destination, err := os.Create(tmpPath)
if err != nil {
logger.Warn(err)
return nil, "", err
}
defer destination.Close()
_, err = source.Seek(item.Start, 0)
if err != nil {
logger.Warn(err)
return nil, "", err
}
_, err = io.CopyN(destination, source, item.End-item.Start)
if err != nil {
logger.Warn(err)
return nil, "", err
}
item.Path = tmpPath
item.Size = item.End - item.Start
item.End = item.Size
item.Start = 0
}
node, err = BuildFileNode(item, dagServ, cidBuilder)
if err != nil {
logger.Warn(err)
return
}
var path string
path, err = filepath.Rel(filepath.Clean(parentPath), filepath.Clean(item.Path))
if err != nil {
logger.Warn(err)
return
Expand Down

0 comments on commit 3c7b420

Please sign in to comment.