Skip to content

Commit

Permalink
TLDR-521 Fix splittext for file names with several dots (#385)
Browse files Browse the repository at this point in the history
* Fix splittext for file names with several dots

* Add one more double extension

* Review fixes
  • Loading branch information
NastyBoget authored Dec 19, 2023
1 parent 72d27f7 commit 49143dd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
17 changes: 6 additions & 11 deletions dedoc/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import re
import shutil
import time
from os.path import splitext
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, TypeVar

import requests
Expand All @@ -24,6 +23,7 @@
from dedoc.data_structures.tree_node import TreeNode

T = TypeVar("T")
double_dot_extensions = (".txt.gz", ".tar.gz", ".mht.gz", ".mhtml.gz", ".note.pickle")


def list_get(ls: List[T], index: int, default: Optional[T] = None) -> Optional[T]:
Expand Down Expand Up @@ -63,16 +63,11 @@ def splitext_(path: str) -> Tuple[str, str]:
"""
get extensions with several dots
"""
if len(path.split()) > 1:
first, second = path.rsplit(maxsplit=1)
sep = path[len(first)]
name, ext = splitext(second)
if len(ext) == 0:
name, ext = ext, name
return first + sep + name, ext
if len(path.split(".")) > 2:
return path.split(".")[0], "." + ".".join(path.split(".")[-2:])
return splitext(path)
if not path.endswith(double_dot_extensions):
return os.path.splitext(path)

name, *ext_list = path.rsplit(".", maxsplit=2)
return name, f".{'.'.join(ext_list)}"


def _text_from_item(item: dict) -> str:
Expand Down
12 changes: 12 additions & 0 deletions tests/unit_tests/test_module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,15 @@ def test_splitext_space_name(self) -> None:
name, extension = splitext_(name_extension)
self.assertEqual("some file ", name)
self.assertEqual(".doc", extension)

def test_splitext_dots_name(self) -> None:
name_extension = "1700134420_941.23_to_csv.csv"
name, extension = splitext_(name_extension)
self.assertEqual("1700134420_941.23_to_csv", name)
self.assertEqual(".csv", extension)

def test_splitext_double_dot_extension(self) -> None:
name_extension = "some_name.tar.gz"
name, extension = splitext_(name_extension)
self.assertEqual("some_name", name)
self.assertEqual(".tar.gz", extension)

0 comments on commit 49143dd

Please sign in to comment.