Skip to content

Commit

Permalink
[wip][performance] Use pread when possible to avoid read amplificatio…
Browse files Browse the repository at this point in the history
…n caused by buffering
  • Loading branch information
mxmlnkn committed Sep 9, 2024
1 parent 4f58040 commit a787212
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 5 deletions.
12 changes: 11 additions & 1 deletion core/ratarmountcore/SQLiteIndexedTar.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,17 @@ def read(self, fileInfo: FileInfo, size: int, offset: int) -> bytes:
return file.read(size)

# For non-sparse files, we can simply seek to the offset and read from it.
self.tarFileObject.seek(tarFileInfo.offset + offset, os.SEEK_SET)
fileno = None
if hasattr(os, 'pread'):
if hasattr(self.tarFileObject, 'fileno'):
try:
fileno = self.tarFileObject.fileno()
except Exception:
pass

if isinstance(fileno, int) and fileno >= 0:
return os.pread(fileno, size, tarFileInfo.offset + offset)
self.tarFileObject.seek(tarFileInfo.offset + offset, io.SEEK_SET)
return self.tarFileObject.read(size)

@overrides(MountSource)
Expand Down
40 changes: 36 additions & 4 deletions core/ratarmountcore/StenciledFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,28 @@ def read(self, size: int = -1) -> bytes:
offsetInsideStencil = self.offset - self.cumsizes[i]
assert offsetInsideStencil >= 0
assert offsetInsideStencil < self.sizes[i]
self.fileObjects[i].seek(self.offsets[i] + offsetInsideStencil, io.SEEK_SET)
offset = self.offsets[i] + offsetInsideStencil

# Read as much as requested or as much as the current contiguous region / stencil still contains
readableSize = min(size, self.sizes[i] - (self.offset - self.cumsizes[i]))
tmp = self.fileObjects[i].read(readableSize)

fileObject = self.fileObjects[i]
fileno = None
if hasattr(os, 'pread'):
if hasattr(fileObject, 'fileno'):
try:
fileno = fileObject.fileno()
except Exception:
pass

# print("[StenciledFile.read] fileno:", fileno, "readableSize:", readableSize)

if isinstance(fileno, int) and fileno >= 0:
tmp = os.pread(fileno, readableSize, offset)
else:
fileObject.seek(offset, io.SEEK_SET)
tmp = fileObject.read(readableSize)

self.offset += len(tmp)
result += tmp

Expand Down Expand Up @@ -294,11 +311,26 @@ def read(self, size: int = -1) -> bytes:
offsetInsideStencil = self.offset - self.cumsizes[i]
assert offsetInsideStencil >= 0
assert offsetInsideStencil < self.sizes[i]
fileObject.seek(offsetInsideStencil, io.SEEK_SET)

# Read as much as requested or as much as the current contiguous region / stencil still contains
readableSize = min(size, self.sizes[i] - (self.offset - self.cumsizes[i]))
tmp = fileObject.read(readableSize)

fileno = None
print("TRY PREAD:")
if hasattr(os, 'pread'):
if hasattr(fileObject, 'fileno'):
try:
fileno = fileObject.fileno()
except Exception:
pass

print("FILENO:", fileno)
if isinstance(fileno, int) and fileno >= 0:
tmp = os.pread(fileObject.fileno(), readableSize, offsetInsideStencil)
else:
fileObject.seek(offsetInsideStencil, io.SEEK_SET)
tmp = fileObject.read(readableSize)

self.offset += len(tmp)
result += tmp

Expand Down

0 comments on commit a787212

Please sign in to comment.