-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[feature] Add fsspec implementation and register it as ratar://
- Loading branch information
Showing
6 changed files
with
218 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
# pylint: disable=wrong-import-order | ||
# pylint: disable=wrong-import-position | ||
# pylint: disable=protected-access | ||
|
||
import io | ||
import os | ||
import shutil | ||
import sys | ||
import tarfile | ||
import tempfile | ||
|
||
import fsspec | ||
|
||
try: | ||
import pandas as pd | ||
except ImportError: | ||
pd = None # type: ignore | ||
|
||
|
||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | ||
|
||
from ratarmountcore.SQLiteIndexedTarFsspec import SQLiteIndexedTarFileSystem as ratarfs # noqa: E402 | ||
|
||
|
||
def findTestFile(relativePathOrName): | ||
for i in range(3): | ||
path = os.path.sep.join([".."] * i + ["tests", relativePathOrName]) | ||
if os.path.exists(path): | ||
return path | ||
return relativePathOrName | ||
|
||
|
||
def test_fileSystem(): | ||
fs = ratarfs(findTestFile('single-file.tar.gz')) | ||
|
||
assert 'bar' in fs.ls("/", detail=False) | ||
assert 'bar' in [info['name'] for info in fs.ls("/", detail=True)] | ||
|
||
assert not fs.isfile("/") | ||
assert fs.isdir("/") | ||
assert fs.exists("/") | ||
|
||
assert fs.isfile("/bar") | ||
assert not fs.isdir("/bar") | ||
assert not fs.exists("/bar2") | ||
|
||
assert fs.cat("/bar") == b"foo\n" | ||
assert fs.cat("bar") == b"foo\n" | ||
|
||
with fs.open("bar") as file: | ||
assert file.read() == b"foo\n" | ||
|
||
|
||
def test_URLContextManager(): | ||
with fsspec.open("ratar://bar::file://" + findTestFile('single-file.tar.gz')) as file: | ||
assert file.read() == b"foo\n" | ||
|
||
|
||
def test_URL(): | ||
openFile = fsspec.open("ratar://bar::file://" + findTestFile('single-file.tar.gz')) | ||
with openFile as file: | ||
assert file.read() == b"foo\n" | ||
|
||
|
||
def test_pandas(): | ||
with tempfile.TemporaryDirectory(suffix=".test.ratarmount") as folderPath: | ||
oldPath = os.getcwd() | ||
try: | ||
with open("test.csv", "wt") as file: | ||
file.write("1,2\n3,4") | ||
with tarfile.open("test-csv.tar", "w") as archive: | ||
archive.add("test.csv") | ||
|
||
# Pandas seems | ||
data = pd.read_csv("tar://test.csv::file://test-csv.tar", header=None) | ||
assert data.iloc[0, 1] == 2 | ||
finally: | ||
os.chdir(oldPath) | ||
|
||
|
||
def test_URLRapidgzip(): | ||
# I had problems with resource deallocation! | ||
# For Rapidgzip it becomes important because of the background threads. | ||
with tempfile.TemporaryDirectory(suffix=".test.ratarmount") as folderPath: | ||
contents = os.urandom(96 * 1024 * 1024) | ||
|
||
tarPath = os.path.join(folderPath, "random-data.tar.gz") | ||
with tarfile.open(name=tarPath, mode="w:gz") as tarArchive: | ||
# Must create a sufficiently large .tar.gz so that rapidgzip is actually used. | ||
# In the future this "has multiple chunks" rapidgzip test is to be removed and | ||
# this whole test becomes redundant. | ||
tinfo = tarfile.TarInfo("random-data") | ||
tinfo.size = len(contents) | ||
tarArchive.addfile(tinfo, io.BytesIO(contents)) | ||
|
||
# Only global variables trigger the "Detected Python finalization from running rapidgzip thread." bug. | ||
# I am not sure why. Probably, because it gets garbage-collected later. | ||
global openFile | ||
openFile = fsspec.open("ratar://random-data::file://" + tarPath) | ||
with openFile as file: | ||
assert file.read() == contents | ||
|
||
# This is still some step the user has to do, but it cannot be avoided. | ||
# It might be helpful if fsspec had some kind of better resource management for filesystems though. | ||
del openFile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters