diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 12c4333a..bbf4da82 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,7 +21,7 @@ jobs: - name: Install pip Dependencies run: | python3 -m pip install --upgrade pip - python3 -m pip install --user fusepy pytest + python3 -m pip install --user fusepy pytest lz4 PySquashfsImage - name: Style Check With Black run: | @@ -118,7 +118,7 @@ jobs: # zstd, may also call external binaries depending on how libarchive was compiled! # https://github.com/libarchive/libarchive/blob/ad5a0b542c027883d7069f6844045e6788c7d70c/libarchive/ # archive_read_support_filter_lrzip.c#L68 - sudo apt-get -y install libfuse2 fuse3 bzip2 pbzip2 pixz zstd unar lrzip lzop + sudo apt-get -y install libfuse2 fuse3 bzip2 pbzip2 pixz zstd unar lrzip lzop gcc liblzo2-dev set -x - name: Install Dependencies (MacOS) @@ -130,7 +130,7 @@ jobs: # And the error message is atrocious: # cmdline.extend(args) # TypeError: 'NoneType' object is not iterable - brew install macfuse coreutils pixz pbzip2 zstd unar libarchive lrzip lzop + brew install macfuse coreutils pixz pbzip2 zstd unar libarchive lrzip lzop lzo # Add brew installation binary folder to PATH so that command line tools like zstd can be found export PATH="$PATH:/usr/local/bin" diff --git a/AppImage/build-ratarmount-appimage.sh b/AppImage/build-ratarmount-appimage.sh index 6188b921..362fe2a4 100644 --- a/AppImage/build-ratarmount-appimage.sh +++ b/AppImage/build-ratarmount-appimage.sh @@ -23,7 +23,7 @@ function installSystemRequirements() # We need to install development dependencies to build Python packages from source and we also need # to install libraries such as libarchive in order to copy them into the AppImage. yum install -y fuse fakeroot patchelf fuse-libs libsqlite3x strace desktop-file-utils libzstd-devel \ - libarchive libarchive-devel lzop + libarchive libarchive-devel lzop lzo lzo-devel } function installAppImageTools() @@ -61,7 +61,7 @@ function installAppImagePythonPackages() "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir rapidgzip fi "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ../core - "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir .. + "$APP_PYTHON_BIN" -I -m pip install --no-cache-dir ..[full] } function installAppImageSystemLibraries() @@ -96,16 +96,19 @@ function installAppImageSystemLibraries() libraries+=( $( repoquery -l fuse-libs | 'grep' 'lib64.*[.]so' ) ) libraries+=( $( repoquery -l libarchive | 'grep' 'lib64.*[.]so' ) ) libraries+=( $( repoquery -l libarchive-devel | 'grep' 'lib64.*[.]so' ) ) + libraries+=( $( repoquery -l lzo | 'grep' 'lib64.*[.]so' ) ) libraries+=( $( repoquery -l xz-devel | 'grep' 'lib64.*[.]so' ) ) elif commandExists dnf; then libraries+=( $( dnf repoquery -l fuse-libs | 'grep' 'lib64.*[.]so' ) ) libraries+=( $( dnf repoquery -l libarchive | 'grep' 'lib64.*[.]so' ) ) libraries+=( $( dnf repoquery -l libarchive-devel | 'grep' 'lib64.*[.]so' ) ) + libraries+=( $( dnf repoquery -l lzo | 'grep' 'lib64.*[.]so' ) ) libraries+=( $( dnf repoquery -l xz-devel | 'grep' 'lib64.*[.]so' ) ) elif commandExists dpkg; then libraries+=( $( dpkg -L libfuse2 | 'grep' '/lib.*[.]so' ) ) libraries+=( $( dpkg -L libarchive13 | 'grep' '/lib.*[.]so' ) ) libraries+=( $( dpkg -L libarchive-dev | 'grep' '/lib.*[.]so' ) ) + libraries+=( $( dpkg -L lzo | 'grep' '/lib.*[.]so' ) ) libraries+=( $( dpkg -L liblzma5 | 'grep' '/lib.*[.]so' ) ) else echo -e "\e[31mCannot gather FUSE libs into AppImage without (dnf) repoquery.\e[0m" diff --git a/core/ratarmountcore/SquashFSMountSource.py b/core/ratarmountcore/SquashFSMountSource.py new file mode 100644 index 00000000..76879ad1 --- /dev/null +++ b/core/ratarmountcore/SquashFSMountSource.py @@ -0,0 +1,514 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# pylint: disable=protected-access, import-outside-toplevel, unused-argument + +import io +import json +import os +import re +import stat +import tarfile +import traceback +from timeit import default_timer as timer + +from typing import Any, Dict, IO, List, Optional, Tuple, Union + +try: + import PySquashfsImage +except ImportError: + PySquashfsImage = None # type: ignore + +try: + from PySquashfsImage import SquashFsImage +except ImportError: + # We need to define this for @overrides and pytype, but it also is a nice documentation + # for the expected members in PySquashfsImage.SquashFsImage. + class SquashFsImage: # type: ignore + def __init__(self, fd, offset=0, closefd=True): + self._sblk: Any = None + + def __iter__(self): + pass + + def _get_compressor(self, compression_id): + pass + + def _initialize(self): + pass + + # These are not overridden, only called: + + def _read_block_list(self, start, offset, blocks): + raise NotImplementedError + + def _read_fragment(self, fragment): + raise NotImplementedError + + def _read_inode(self, start_block, offset): + raise NotImplementedError + + def _opendir(self, block_start, offset): + raise NotImplementedError + + def _dir_scan(self, start_block, offset): + raise NotImplementedError + + +from .compressions import findSquashFSOffset +from .MountSource import FileInfo, MountSource +from .SQLiteIndex import SQLiteIndex, SQLiteIndexedTarUserData +from .SQLiteIndexMountSource import SQLiteIndexMountSource +from .utils import InvalidIndexError, overrides + + +class SquashFSFile(io.RawIOBase): + def __init__(self, image, inode) -> None: + self._image = image + self._inode = inode + + self._offset = 0 + self._size = inode.data + self._blockSize = image._sblk.block_size + self._lastBlockIndex = inode.data // self._blockSize + + self._blockList = [] + self._dataToBlockOffset: Dict[int, int] = {} # block offset may be negative (-size) for sparse blocks + self._compressedBlockOffsets = [] + if inode.blocks: + self._blockList = [ + block + for block in image._read_block_list(inode.block_start, inode.block_offset, inode.blocks) + if block != PySquashfsImage.SQUASHFS_INVALID_FRAG + ] + + compressedBlockOffset = inode.start + for i, block in enumerate(self._blockList): + blockSize = self._size % self._blockSize if i == self._lastBlockIndex else self._blockSize + assert blockSize > 0 + if block: + self._compressedBlockOffsets.append(compressedBlockOffset) + compressedBlockOffset += PySquashfsImage.SQUASHFS_COMPRESSED_SIZE_BLOCK(block) + else: + # sparse file + self._compressedBlockOffsets.append(-blockSize) + assert len(self._compressedBlockOffsets) == len(self._blockList) + + self._fragment = None + if inode.frag_bytes: + self._fragment = image._read_fragment(inode.fragment) + + self._bufferIO: Optional[IO[bytes]] = None + self._blockIndex = 0 + self._buffer = b'' + self._refillBuffer(self._blockIndex) # Requires self._blockList to be initialized + + def _refillBuffer(self, blockIndex: int) -> None: + self._blockIndex = blockIndex + self._buffer = b'' + + assert self._blockIndex >= 0 + if self._blockIndex < len(self._blockList): + block = self._blockList[self._blockIndex] + if block: + start = self._compressedBlockOffsets[self._blockIndex] + self._buffer = self._image._read_data_block(start, block) + else: + if (self._blockIndex + 1) * self._blockSize >= self._size: + blockSize = max(0, self._size - self._blockIndex * self._blockSize) + else: + blockSize = self._blockSize + self._buffer = b'\0' * blockSize + elif self._fragment and self._blockIndex == len(self._blockList): + fragment = self._image._read_data_block(*self._fragment) + self._buffer = fragment[self._inode.offset : self._inode.offset + self._inode.frag_bytes] + + self._bufferIO = io.BytesIO(self._buffer) + + @overrides(io.RawIOBase) + def readinto(self, buffer): + """Generic implementation which uses read.""" + with memoryview(buffer) as view, view.cast("B") as byteView: # type: ignore + readBytes = self.read(len(byteView)) + byteView[: len(readBytes)] = readBytes + return len(readBytes) + + def read1(self, size: int = -1) -> bytes: + if not self._bufferIO: + raise RuntimeError("Closed file cannot be read from!") + result = self._bufferIO.read(size) + # An empty buffer signals the end of the file! + if result or not self._buffer: + return result + + self._blockIndex += 1 + self._refillBuffer(self._blockIndex) + return self._bufferIO.read(size) + + @overrides(io.RawIOBase) + def read(self, size: int = -1) -> bytes: + result = bytearray() + while size < 0 or len(result) < size: + readData = self.read1(size if size < 0 else size - len(result)) + if not readData: + break + result.extend(readData) + return bytes(result) + + @overrides(io.RawIOBase) + def fileno(self) -> int: + # This is a virtual Python level file object and therefore does not have a valid OS file descriptor! + raise io.UnsupportedOperation() + + @overrides(io.RawIOBase) + def seekable(self) -> bool: + return True + + @overrides(io.RawIOBase) + def readable(self) -> bool: + return True + + @overrides(io.RawIOBase) + def writable(self) -> bool: + return False + + @overrides(io.RawIOBase) + def seek(self, offset: int, whence: int = io.SEEK_SET) -> int: + if not self._bufferIO: + raise RuntimeError("Closed file cannot be seeked!") + + here = self.tell() + if whence == io.SEEK_CUR: + offset += here + elif whence == io.SEEK_END: + offset += self._size + + self._offset = max(0, min(offset, self._size)) + bufferOffset = self._blockIndex * self._blockSize + if offset < bufferOffset or offset >= bufferOffset + len(self._buffer): + self._refillBuffer(offset // self._blockSize) # Updates self._blockIndex! + self._bufferIO.seek(offset - self._blockIndex * self._blockSize) + + return self.tell() + + @overrides(io.RawIOBase) + def tell(self) -> int: + # Returning self._blockIndex * self._blockSize + self._bufferIO.tell() will not work when we have + # an empty buffer after trying to read past the end of the file. + return self._offset + + +# https://github.com/matteomattei/PySquashfsImage/blob/e637b26b3bc6268dd589fa1439fecf99e49a565b/PySquashfsImage/__init__.py#L82 +class SquashFSImage(SquashFsImage): + """ + Contains several improvements over the base class: + - Does not create the whole folder hierarchy in memory when only iterating over it to avoid high memory + usage for SquashFS images with millions of files. + - Adds seekable, streamable file object accessor that can be opened given a single number. + - Adds thread locks around the underlying file object so that multiple file objects can be opened and used + from multiple threads concurrently. + - Uses libdeflate or ISA-L if installed, which a generally faster than the standard zlib. + + Beware that we are overwriting and using "private" methods starting with underscores! + That's why we need to pin to an exact PySquashfsImage release. + """ + + @overrides(SquashFsImage) + def __init__(self, *args, **kwargs): + self._real_root = None + super().__init__(*args, **kwargs) # Calls overridden _initialize + + @overrides(SquashFsImage) + def _initialize(self): + self._fd.seek(self._offset) + self._read_super() + self._read_uids_guids() + self._read_fragment_table() + self._read_xattrs_from_disk() + # Moved self._root initialization into a property and _generate_root + + def _generate_root(self): + root_block = PySquashfsImage.SQUASHFS_INODE_BLK(self._sblk.root_inode) + root_offset = PySquashfsImage.SQUASHFS_INODE_OFFSET(self._sblk.root_inode) + self._real_root = self._dir_scan(root_block, root_offset) + + @staticmethod + def _join_inode_offset(start_block, offset): + assert start_block < 2**32 + assert offset < 2**16 + return (start_block << 16) + offset + + @staticmethod + def _split_inode_offset(inode_offset): + return inode_offset >> 16, inode_offset & 0xFFFF + + def read_inode(self, inode_offset): + """Newly added function over SquashFsImage that adds an accessor via a simple integer.""" + return self._read_inode(*self._split_inode_offset(inode_offset)) + + @overrides(SquashFsImage) + def __iter__(self): # -> PySquashfsImage.file.File + """ + Performance improved function over PySquashfsImage.__iter__ that generates data on demand instead + of keeping all metadata in memory and returning a generator over that. + """ + root_block = PySquashfsImage.SQUASHFS_INODE_BLK(self._sblk.root_inode) + root_offset = PySquashfsImage.SQUASHFS_INODE_OFFSET(self._sblk.root_inode) + root_inode_offset, root_directory = self._open_directory(root_block, root_offset) + yield root_inode_offset, root_directory + yield from self._recursive_inodes_iterator(root_directory) + + def _open_directory(self, start_block, offset, parent=None, name=None): + directory = self._opendir(start_block, offset) + if parent is not None: + directory._parent = parent + if name is not None: + directory._name = name + return self._join_inode_offset(start_block, offset), directory + + def _recursive_inodes_iterator(self, directory): # -> PySquashfsImage.file.File + for entry in directory.entries: + start_block = entry["start_block"] + offset = entry["offset"] + if entry["type"] == PySquashfsImage.Type.DIR: + inode_offset, subdirectory = self._open_directory(start_block, offset, directory, entry["name"]) + yield inode_offset, subdirectory + yield from self._recursive_inodes_iterator(subdirectory) + else: + inode = self._read_inode(start_block, offset) + cls = PySquashfsImage.filetype[entry["type"]] + yield self._join_inode_offset(start_block, offset), cls(self, inode, entry["name"], directory) + + @property + def _root(self): + if self._real_root is None: + self._generate_root() + return self._real_root + + @_root.setter + def _root(self, value): + # super().__init__ will initialize it to None but super()._initialize should not be called! + assert value is None + + def open(self, inode): + return SquashFSFile(self, inode) + + +class SquashFSMountSource(SQLiteIndexMountSource): + def __init__( + self, + # fmt: off + fileOrPath : Union[str, IO[bytes]], + writeIndex : bool = False, + clearIndexCache : bool = False, + indexFilePath : Optional[str] = None, + indexFolders : Optional[List[str]] = None, + encoding : str = tarfile.ENCODING, + verifyModificationTime : bool = False, + printDebug : int = 0, + indexMinimumFileCount : int = 1000, + transform : Optional[Tuple[str, str]] = None, + **options + # fmt: on + ) -> None: + self.rawFileObject = open(fileOrPath, 'rb') if isinstance(fileOrPath, str) else fileOrPath + self.rawFileObject.seek(0) + offset = findSquashFSOffset(self.rawFileObject) + if offset < 0: + raise ValueError("Not a valid SquashFS image!") + + # fmt: off + self.fileObject = SquashFSImage(self.rawFileObject, offset=offset) + self.archiveFilePath = fileOrPath if isinstance(fileOrPath, str) else None + self.encoding = encoding + self.verifyModificationTime = verifyModificationTime + self.printDebug = printDebug + self.options = options + self.transformPattern = transform + # fmt: on + + self.transform = ( + (lambda x: re.sub(self.transformPattern[0], self.transformPattern[1], x)) + if isinstance(self.transformPattern, (tuple, list)) and len(self.transformPattern) == 2 + else (lambda x: x) + ) + + super().__init__( + SQLiteIndex( + indexFilePath, + indexFolders=indexFolders, + archiveFilePath=self.archiveFilePath, + encoding=self.encoding, + checkMetadata=self._checkMetadata, + printDebug=self.printDebug, + indexMinimumFileCount=indexMinimumFileCount, + backendName='SquashFSMountSource', + ), + clearIndexCache=clearIndexCache, + ) + + isFileObject = not isinstance(fileOrPath, str) + + if self.index.indexIsLoaded(): + # self._loadOrStoreCompressionOffsets() # load + self.index.reloadIndexReadOnly() + else: + # Open new database when we didn't find an existing one. + # Simply open in memory without an error even if writeIndex is True but when not indication + # for a index file location has been given. + if writeIndex and (indexFilePath or not isFileObject): + self.index.openWritable() + else: + self.index.openInMemory() + + self._createIndex() + # self._loadOrStoreCompressionOffsets() # store + if self.index.indexIsLoaded(): + self._storeMetadata() + self.index.reloadIndexReadOnly() + + def _storeMetadata(self) -> None: + argumentsToSave = ['encoding', 'transformPattern'] + argumentsMetadata = json.dumps({argument: getattr(self, argument) for argument in argumentsToSave}) + self.index.storeMetadata(argumentsMetadata, self.archiveFilePath) + + def _convertToRow(self, inodeOffset: int, info: "PySquashfsImage.file.File") -> Tuple: # type: ignore + # Note that PySquashfsImage.file.Directory inherits from file.File, i.e., info can also be a directory. + mode = 0o555 | (stat.S_IFDIR if info.is_dir else stat.S_IFREG) + mtime = info.time + + linkname = "" + if info.is_symlink: + linkname = info.readlink() + mode = 0o555 | stat.S_IFLNK + + path, name = SQLiteIndex.normpath(self.transform(info.path)).rsplit("/", 1) + + # Currently unused. Squashfs files are stored in multiple blocks, so a single offset is insufficient. + dataOffset = 0 + + # SquashFS also returns non-zero sizes for directory, FIFOs, symbolic links, and device files + fileSize = info.size if info.is_file else 0 + + # fmt: off + fileInfo : Tuple = ( + path , # 0 : path + name , # 1 : file name + inodeOffset , # 2 : header offset + dataOffset , # 3 : data offset + fileSize , # 4 : file size + mtime , # 5 : modification time + mode , # 6 : file mode / permissions + 0 , # 7 : TAR file type. Currently unused. Overlaps with mode + linkname , # 8 : linkname + 0 , # 9 : user ID + 0 , # 10 : group ID + False , # 11 : is TAR (unused?) + False , # 12 : is sparse + ) + # fmt: on + + return fileInfo + + def _createIndex(self) -> None: + if self.printDebug >= 1: + print(f"Creating offset dictionary for {self.archiveFilePath} ...") + t0 = timer() + + self.index.ensureIntermediaryTables() + + # TODO Doing this in a chunked manner with generators would make it work better for large archives. + fileInfos = [] + for inodeOffset, info in self.fileObject: + fileInfos.append(self._convertToRow(inodeOffset, info)) + self.index.setFileInfos(fileInfos) + + # Resort by (path,name). This one-time resort is faster than resorting on each INSERT (cache spill) + if self.printDebug >= 2: + print("Resorting files by path ...") + + self.index.finalize() + + t1 = timer() + if self.printDebug >= 1: + print(f"Creating offset dictionary for {self.archiveFilePath} took {t1 - t0:.2f}s") + + @overrides(SQLiteIndexMountSource) + def __exit__(self, exception_type, exception_value, exception_traceback): + super().__exit__(exception_type, exception_value, exception_traceback) + self.rawFileObject.close() + self.fileObject.close() + + @overrides(MountSource) + def open(self, fileInfo: FileInfo) -> IO[bytes]: + assert fileInfo.userdata + extendedFileInfo = fileInfo.userdata[-1] + assert isinstance(extendedFileInfo, SQLiteIndexedTarUserData) + return self.fileObject.open(self.fileObject.read_inode(extendedFileInfo.offsetheader)) + + def _tryToOpenFirstFile(self): + # Get first row that has the regular file bit set in mode (stat.S_IFREG == 32768 == 1<<15). + result = self.index.getConnection().execute( + f"""SELECT path,name {SQLiteIndex.FROM_REGULAR_FILES} ORDER BY "offsetheader" ASC LIMIT 1;""" + ) + if not result: + return + firstFile = result.fetchone() + if not firstFile: + return + + if self.printDebug >= 2: + print( + "[Info] The index contains no backend name. Therefore, will try to open the first file as " + "an integrity check." + ) + try: + fileInfo = self.getFileInfo(firstFile[0] + '/' + firstFile[1]) + if not fileInfo: + return + + with self.open(fileInfo) as file: + file.read(1) + except Exception as exception: + if self.printDebug >= 2: + print("[Info] Trying to open the first file raised an exception:", exception) + if self.printDebug >= 3: + traceback.print_exc() + raise InvalidIndexError("Integrity check of opening the first file failed.") from exception + + def _checkMetadata(self, metadata: Dict[str, Any]) -> None: + """Raises an exception if the metadata mismatches so much that the index has to be treated as incompatible.""" + + if 'tarstats' in metadata: + if not self.archiveFilePath: + raise InvalidIndexError("Archive contains file stats but cannot stat real archive!") + + storedStats = json.loads(metadata['tarstats']) + archiveStats = os.stat(self.archiveFilePath) + + if hasattr(archiveStats, "st_size") and 'st_size' in storedStats: + if archiveStats.st_size < storedStats['st_size']: + raise InvalidIndexError( + f"Archive for this SQLite index has shrunk in size from " + f"{storedStats['st_size']} to {archiveStats.st_size}" + ) + + # Only happens very rarely, e.g., for more recent files with the same size. + if ( + self.verifyModificationTime + and hasattr(archiveStats, "st_mtime") + and 'st_mtime' in storedStats + and archiveStats.st_mtime != storedStats['st_mtime'] + ): + raise InvalidIndexError( + f"The modification date for the archive file {storedStats['st_mtime']} " + f"to this SQLite index has changed ({str(archiveStats.st_mtime)})", + ) + + if 'arguments' in metadata: + SQLiteIndex.checkMetadataArguments( + json.loads(metadata['arguments']), self, argumentsToCheck=['encoding', 'transformPattern'] + ) + + if 'backendName' not in metadata: + self._tryToOpenFirstFile() diff --git a/core/ratarmountcore/compressions.py b/core/ratarmountcore/compressions.py index 7759de26..89e15860 100644 --- a/core/ratarmountcore/compressions.py +++ b/core/ratarmountcore/compressions.py @@ -66,6 +66,11 @@ except (ImportError, AttributeError): libarchive = None +try: + import PySquashfsImage +except ImportError: + PySquashfsImage = None + CompressionModuleInfo = collections.namedtuple('CompressionModuleInfo', ['name', 'open']) # Defining lambdas does not yet check the names of entities used inside the lambda! @@ -167,6 +172,72 @@ def isRarFile(fileObject) -> bool: } +def isSquashFS(fileObject) -> bool: + offset = fileObject.tell() + try: + # https://dr-emann.github.io/squashfs/squashfs.html#_the_superblock + magicBytes = fileObject.read(4) + if magicBytes != b"hsqs": + return False + + _inodeCount, _modificationTime, blockSize, _fragmentCount = struct.unpack(' 20 or 2**blockSizeLog2 != blockSize: + return False + + if major != 4 or minor != 0: + return False + + # Compressions: 0:None, 1:GZIP, 2:LZMA, 3:LZO, 4:XZ, 5:LZ4, 6:ZSTD + if compressor > 6: + return False + + finally: + fileObject.seek(offset) + + return True + + +def findSquashFSOffset(fileObject, maxSkip=1024 * 1024) -> int: + """ + Looks for the SquashFS superblock, which can be at something other than offset 0 for AppImage files. + """ + # https://dr-emann.github.io/squashfs/squashfs.html#_the_superblock + if isSquashFS(fileObject): + return 0 + + oldOffset = fileObject.tell() + try: + magic = b"hsqs" + data = fileObject.read(maxSkip + len(magic)) + magicOffset = 0 + while True: + magicOffset = data.find(magic, magicOffset + 1) + if magicOffset < 0 or magicOffset >= len(data): + break + fileObject.seek(magicOffset) + if isSquashFS(fileObject): + return magicOffset + finally: + fileObject.seek(oldOffset) + + return -1 + + +if 'PySquashfsImage' in sys.modules and isinstance(PySquashfsImage, types.ModuleType): + ARCHIVE_FORMATS['squashfs'] = CompressionInfo( + ['squashfs', 'AppImage', 'snap'], + [], + [CompressionModuleInfo('PySquashfsImage', lambda x: PySquashfsImage.SquashFsImage(x))], + lambda x: findSquashFSOffset(x) >= 0, + ) + + # libarchive support is split into filters (compressors or encoders working on a single file) and (archive) formats. # For now, only list formats here that are not supported by other backends, because libarchive is slower anyway. LIBARCHIVE_FILTER_FORMATS: Dict[str, CompressionInfo] = {} diff --git a/core/ratarmountcore/factory.py b/core/ratarmountcore/factory.py index be422ed5..81d65d97 100644 --- a/core/ratarmountcore/factory.py +++ b/core/ratarmountcore/factory.py @@ -6,13 +6,14 @@ from typing import IO, Optional, Union -from .compressions import checkForSplitFile, libarchive, rarfile, TAR_COMPRESSION_FORMATS, zipfile +from .compressions import checkForSplitFile, libarchive, PySquashfsImage, rarfile, TAR_COMPRESSION_FORMATS, zipfile from .utils import CompressionError, RatarmountError from .MountSource import MountSource from .FolderMountSource import FolderMountSource from .RarMountSource import RarMountSource from .SingleFileMountSource import SingleFileMountSource from .SQLiteIndexedTar import SQLiteIndexedTar +from .SquashFSMountSource import SquashFSMountSource from .StenciledFile import JoinedFileFromFactory from .ZipMountSource import ZipMountSource from .LibarchiveMountSource import LibarchiveMountSource @@ -40,7 +41,7 @@ def _openTarMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> Optiona def _openZipMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> Optional[MountSource]: try: - if zipfile is not None and zipfile is not None: + if zipfile is not None: # is_zipfile might yields some false positives, but those should then raise exceptions, which # are caught, so it should be fine. See: https://bugs.python.org/issue42096 if zipfile.is_zipfile(fileOrPath): @@ -83,10 +84,21 @@ def _openLibarchiveMountSource(fileOrPath: Union[str, IO[bytes]], **options) -> return None +def _openPySquashfsImage(fileOrPath: Union[str, IO[bytes]], **options) -> Optional[MountSource]: + try: + if PySquashfsImage is not None: + return SquashFSMountSource(fileOrPath, **options) + finally: + if hasattr(fileOrPath, 'seek'): + fileOrPath.seek(0) # type: ignore + return None + + _BACKENDS = { "rarfile": _openRarMountSource, "tarfile": _openTarMountSource, "zipfile": _openZipMountSource, + "pysquashfsimage": _openPySquashfsImage, "libarchive": _openLibarchiveMountSource, } diff --git a/core/tests/test_SquashfsMountSource.py b/core/tests/test_SquashfsMountSource.py new file mode 100644 index 00000000..68cc969f --- /dev/null +++ b/core/tests/test_SquashfsMountSource.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# pylint: disable=wrong-import-order +# pylint: disable=wrong-import-position +# pylint: disable=protected-access + +import io +import os +import struct +import sys + +import pytest + +from helpers import copyTestFile + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from ratarmountcore.compressions import findSquashFSOffset # noqa: E402 +from ratarmountcore.SquashFSMountSource import SquashFSMountSource # noqa: E402 + + +class TestSquashfsMountSource: + @staticmethod + def test_find_magic_bytes(): + assert findSquashFSOffset(io.BytesIO()) < 0 + assert findSquashFSOffset(io.BytesIO(b"")) < 0 + assert findSquashFSOffset(io.BytesIO(b"a")) < 0 + assert findSquashFSOffset(io.BytesIO(b"ab")) < 0 + assert findSquashFSOffset(io.BytesIO(b"ab")) < 0 + assert findSquashFSOffset(io.BytesIO(b"foob")) < 0 + + validHeader = b"hsqs" + struct.pack('= 2: - print(f"Archive '{tarFile}' (compression: {compression}) can't be opened!") + print(f"Archive '{tarFile}' (compression: {compression}) cannot be opened!") if printDebug >= 1: print("[Info] Supported compressions:", list(supportedCompressions.keys())) @@ -921,7 +919,7 @@ def checkInputFileType( print("[Warning] - apt install libarchive13") print("[Warning] - yum install libarchive") - raise argparse.ArgumentTypeError(f"Archive '{tarFile}' can't be opened!") + raise argparse.ArgumentTypeError(f"Archive '{tarFile}' cannot be opened!") if not findAvailableOpen(compression): moduleNames = [module.name for module in supportedCompressions[compression].modules] @@ -1812,7 +1810,7 @@ def addToDeletionFile(deletionListFile, pathRelativeToRoot: str): operations=fuseOperationsObject, mountpoint=args.mount_point, foreground=args.foreground, - nothreads=True, # Can't access SQLite database connection object from multiple threads + nothreads=True, # Cannot access SQLite database connection object from multiple threads **fusekwargs, ) except RuntimeError as exception: diff --git a/tests/.pylintrc b/tests/.pylintrc index e85a2066..ffad0026 100644 --- a/tests/.pylintrc +++ b/tests/.pylintrc @@ -3,7 +3,8 @@ init-hook='import sys; sys.path.append("./core")' # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. -extension-pkg-whitelist=indexed_gzip,indexed_bzip2,indexed_zstd,libarchive,libarchive.ffi,lzmaffi,rapidgzip +extension-pkg-whitelist=indexed_gzip,indexed_bzip2,indexed_zstd,libarchive,libarchive.ffi,lzmaffi,rapidgzip,isal, + PySquashfsImage,PySquashfsImage.compressor,zstandard,lz4,deflate # Specify a score threshold to be exceeded before program exits with error. fail-under=10.0 diff --git a/tests/ratarmount-help.txt b/tests/ratarmount-help.txt index 8a74d5bd..719de028 100644 --- a/tests/ratarmount-help.txt +++ b/tests/ratarmount-help.txt @@ -169,8 +169,9 @@ Advanced Options: Specify a backend to be used with higher priority for files which might be opened with multiple backends. Arguments specified last will have the highest priority. A comma-separated list may be specified. Possible - backends: ['indexed_gzip', 'indexed_zstd', 'libarchive', 'lzmaffi', - 'rapidgzip', 'rarfile', 'xz', 'zipfile'] (default: None) + backends: ['PySquashfsImage', 'indexed_gzip', 'indexed_zstd', + 'libarchive', 'lzmaffi', 'rapidgzip', 'rarfile', 'xz', 'zipfile'] + (default: None) -d DEBUG, --debug DEBUG Sets the debugging level. Higher means more output. Currently, 3 is the highest. (default: 1) diff --git a/tests/runtests.sh b/tests/runtests.sh index 0df2bd8a..e30570b6 100755 --- a/tests/runtests.sh +++ b/tests/runtests.sh @@ -1864,7 +1864,20 @@ tests+=( f95f8943f6dcf7b3c1c8c2cab5455f8b tests/2k-recursive-tars.tar.bz2 mimi/02000.tar/foo c157a79031e1c40f85931829bc5fc552 tests/2k-recursive-tars.tar.bz2 mimi/foo ) + pytestedTests+=( + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.gzip.squashfs foo/fighter/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.lzo.squashfs foo/fighter/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.no-compression.squashfs foo/fighter/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.xz.squashfs foo/fighter/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.zstd.squashfs foo/fighter/ufo + + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.gzip.squashfs foo/jet/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.lzo.squashfs foo/jet/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.no-compression.squashfs foo/jet/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.xz.squashfs foo/jet/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.zstd.squashfs foo/jet/ufo + 2709a3348eb2c52302a7606ecf5860bc tests/file-in-non-existing-folder.rar foo2/ufo 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.rar foo/fighter/ufo 2709a3348eb2c52302a7606ecf5860bc tests/folder-symlink.rar foo/jet/ufo @@ -2022,7 +2035,7 @@ if ! uname | 'grep' -q -i darwin; then fi # Intended for AppImage integration tests, for which the pytest unit tests are decidedly not sufficient -# to detect, e.g., missing libarries in the AppImage. +# to detect, e.g., missing libraries in the AppImage. if [[ $TEST_EXTERNAL_COMMAND -eq 1 ]]; then tests+=( "${pytestedTests[@]}" ) fi