From 9b83d0683fe4a46a614e3b8c4028517174114def Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 17 Apr 2020 13:34:18 -0700 Subject: [PATCH] feat: move retries lower Instead or retrying the whole operation, retry the specific parts that can fail. I believe this also fixes cases where we might end up with a retry loop within a retry loop? I'm not sure. --- flatfs.go | 61 ++++++++++++++++++---------------------------------- util.go | 13 ++++++++++- util_unix.go | 2 +- 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/flatfs.go b/flatfs.go index 7227376..6d4bf75 100644 --- a/flatfs.go +++ b/flatfs.go @@ -52,6 +52,10 @@ var ( // RetryDelay is a timeout for a backoff on retrying operations // that fail due to transient errors like too many file descriptors open. RetryDelay = time.Millisecond * 200 + + // RetryAttempts is the maximum number of retries that will be attempted + // before giving up. + RetryAttempts = 6 ) const ( @@ -369,8 +373,6 @@ func (fs *Datastore) renameAndUpdateDiskUsage(tmpPath, path string) error { return err } -var putMaxRetries = 6 - // Put stores a key/value in the datastore. // // Note, that we do not guarantee order of write operations (Put or Delete) @@ -392,24 +394,11 @@ func (fs *Datastore) Put(key datastore.Key, value []byte) error { return ErrClosed } - var err error - for i := 1; i <= putMaxRetries; i++ { - _, err = fs.doWriteOp(&op{ - typ: opPut, - key: key, - v: value, - }) - if err == nil { - break - } - - if !strings.Contains(err.Error(), "too many open files") { - break - } - - log.Errorf("too many open files, retrying in %dms", 100*i) - time.Sleep(time.Millisecond * 100 * time.Duration(i)) - } + _, err := fs.doWriteOp(&op{ + typ: opPut, + key: key, + v: value, + }) return err } @@ -462,15 +451,7 @@ func (fs *Datastore) doWriteOp(oper *op) (done bool, err error) { return false, nil } - // Do the operation - for i := 0; i < 6; i++ { - err = fs.doOp(oper) - - if err == nil || !isTooManyFDError(err) { - break - } - time.Sleep(time.Duration(i+1) * RetryDelay) - } + err = fs.doOp(oper) // Finish it. If no error, it will signal other operations // waiting on this result to succeed. Otherwise, they will @@ -585,22 +566,17 @@ func (fs *Datastore) putMany(data map[datastore.Key][]byte) error { } dirsToSync[dir] = struct{}{} - tmp, err := fs.tempFile() + tmp, err := fs.tempFileOnce() - // Fallback retry for temporary error. - if err != nil && isTooManyFDError(err) { + // If we have too many files open, try closing some, then try + // again repeatedly. + if isTooManyFDError(err) { if err = closer(); err != nil { return err } - for i := 0; i < 6; i++ { - time.Sleep(time.Duration(i+1) * RetryDelay) - - tmp, err = fs.tempFile() - if err == nil || !isTooManyFDError(err) { - break - } - } + tmp, err = fs.tempFile() } + if err != nil { return err } @@ -1122,6 +1098,11 @@ func (fs *Datastore) tempFile() (*os.File, error) { return file, err } +func (fs *Datastore) tempFileOnce() (*os.File, error) { + file, err := tempFileOnce(fs.tempPath, "temp-") + return file, err +} + // only call this on directories. func (fs *Datastore) walk(path string, qrb *query.ResultBuilder) error { dir, err := os.Open(path) diff --git a/util.go b/util.go index 8f2a8db..46c216f 100644 --- a/util.go +++ b/util.go @@ -23,7 +23,7 @@ func DirIsEmpty(name string) (bool, error) { func readFile(filename string) (data []byte, err error) { // Fallback retry for temporary error. - for i := 0; i < 6; i++ { + for i := 0; i < RetryAttempts; i++ { data, err = readFileOnce(filename) if err == nil || !isTooManyFDError(err) { break @@ -32,3 +32,14 @@ func readFile(filename string) (data []byte, err error) { } return data, err } + +func tempFile(dir, pattern string) (fi *os.File, err error) { + for i := 0; i < RetryAttempts; i++ { + fi, err = tempFileOnce(dir, pattern) + if err == nil || !isTooManyFDError(err) { + break + } + time.Sleep(time.Duration(i+1) * RetryDelay) + } + return fi, err +} diff --git a/util_unix.go b/util_unix.go index 2404fce..2e4b82e 100644 --- a/util_unix.go +++ b/util_unix.go @@ -7,7 +7,7 @@ import ( "os" ) -func tempFile(dir, pattern string) (*os.File, error) { +func tempFileOnce(dir, pattern string) (*os.File, error) { return ioutil.TempFile(dir, pattern) }