diff --git a/README.md b/README.md index a359ee9..517446b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ -# Find relative paths from a project root directory +# Project-oriented workflow in Python -Finding project directories in Python (data science) projects, just like there R [`here`][here] and [`rprojroot`][rprojroot] packages. +Finding project directories in Python (data science) projects. + +This library aims to provide both +the programmatic functionality from the R [`rprojroot`][rprojroot] package +and the interactive functionality from the R [`here`][here] package. + +## Motivation **Problem**: I have a project that has a specific folder structure, for example, one mentioned in [Noble 2009][noble2009] or something similar to [this project template][project-template], @@ -11,60 +17,86 @@ and I want to be able to: 3. Reference datasets from a root directory when using a jupyter notebook because everytime I use a jupyter notebook, the working directory changes to the location of the notebook, not where I launched the notebook server. -**Solution**: `pyprojroot` finds the root working directory for your project as a `pathlib` object. +**Solution**: `pyprojroot` finds the root working directory for your project as a `pathlib.Path` object. You can now use the `here` function to pass in a relative path from the project root directory (no matter what working directory you are in the project), and you will get a full path to the specified file. That is, in a jupyter notebook, -you can write something like `pandas.read_csv(here('./data/my_data.csv'))` +you can write something like `pandas.read_csv(here('data/my_data.csv'))` instead of `pandas.read_csv('../data/my_data.csv')`. This allows you to restructure the files in your project without having to worry about changing file paths. Great for reading and writing datasets! +Further reading: + +* [Project-oriented workflows](https://www.tidyverse.org/articles/2017/12/workflow-vs-script/) +* [Stop the working directory insanity](https://gist.github.com/jennybc/362f52446fe1ebc4c49f) +* [Ode to the here package](https://github.com/jennybc/here_here) + ## Installation ### pip + ```bash -pip install pyprojroot +python -m pip install pyprojroot ``` ### conda + https://anaconda.org/conda-forge/pyprojroot ```bash -conda install -c conda-forge pyprojroot +conda install -c conda-forge pyprojroot ``` -## Usage +## Example Usage + +### Interactive + +This is based on the R [`here`][here] library. ```python -from pyprojroot import here +from pyprojroot.here import here here() ``` -### Example +### Programmatic + +This based on the R [`rprojroot`][rprojroot] library. + +```python +import pyprojroot + +base_path = pyprojroot.find_root(pyprojroot.has_dir(".git")) +``` + +## Demonstration Load the packages + ``` -In [1]: from pyprojroot import here +In [1]: from pyprojroot.here import here In [2]: import pandas as pd ``` The current working directory is the "notebooks" folder + ``` In [3]: !pwd /home/dchen/git/hub/scipy-2019-pandas/notebooks ``` In the notebooks folder, I have all my notebooks + ``` In [4]: !ls 01-intro.ipynb 02-tidy.ipynb 03-apply.ipynb 04-plots.ipynb 05-model.ipynb Untitled.ipynb ``` If I wanted to access data in my notebooks I'd have to use `../data` + ``` In [5]: !ls ../data billboard.csv country_timeseries.csv gapminder.tsv pew.csv table1.csv table2.csv table3.csv table4a.csv table4b.csv weather.csv @@ -73,8 +105,9 @@ billboard.csv country_timeseries.csv gapminder.tsv pew.csv table1.csv table However, with there `here` function, I can access my data all from the project root. This means if I move the notebook to another folder or subfolder I don't have to change the path to my data. Only if I move the data to another folder would I need to change the path in my notebook (or script) + ``` -In [6]: pd.read_csv(here('./data/gapminder.tsv'), sep='\t').head() +In [6]: pd.read_csv(here('data/gapminder.tsv'), sep='\t').head() Out[6]: country continent year lifeExp pop gdpPercap 0 Afghanistan Asia 1952 28.801 8425333 779.445314 @@ -84,9 +117,10 @@ Out[6]: 4 Afghanistan Asia 1972 36.088 13079460 739.981106 ``` -By the way, you get a `pathlib` object path back! +By the way, you get a `pathlib.Path` object path back! + ``` -In [7]: here('./data/gapminder.tsv') +In [7]: here('data/gapminder.tsv') Out[7]: PosixPath('/home/dchen/git/hub/scipy-2019-pandas/data/gapminder.tsv') ``` diff --git a/pyprojroot/__init__.py b/pyprojroot/__init__.py index c6f4b00..b17b019 100644 --- a/pyprojroot/__init__.py +++ b/pyprojroot/__init__.py @@ -1,4 +1,3 @@ -from .pyprojroot import here, py_project_root # noqa:F401 - -__all__ = ["here", "py_project_root"] -__version__ = "0.2.0" +from .criterion import * +from .root import find_root, find_root_with_reason +from .here import here diff --git a/pyprojroot/criterion.py b/pyprojroot/criterion.py new file mode 100644 index 0000000..ffa98e5 --- /dev/null +++ b/pyprojroot/criterion.py @@ -0,0 +1,81 @@ +""" +This module is inspired by the `rprojroot` library for R. +See https://github.com/r-lib/rprojroot. + +It is intended for interactive or programmatic only. +""" + +import pathlib as _pathlib +import typing +from os import PathLike as _PathLike + +# TODO: It would be nice to have a class that encapsulates these checks, +# so that we can implement methods like |, !, &, ^ operators + +# TODO: Refactor in a way that allows creation of reasons + + +def as_root_criterion(criterion) -> typing.Callable: + if callable(criterion): + return criterion + + # criterion must be a Collection, rather than just Iterable + if isinstance(criterion, _PathLike): + criterion = [criterion] + criterion = list(criterion) + + def f(path: _pathlib.Path) -> bool: + for c in criterion: + if isinstance(c, _PathLike): + if (path / c).exists(): + return True + else: + if c(path): + return True + return False + + return f + + +def has_file(file: _PathLike) -> typing.Callable: + """ + Check that specified file exists in path. + + Note that a directory with that name will not match. + """ + + def f(path: _pathlib.Path) -> bool: + return (path / file).is_file() + + return f + + +def has_dir(file: _PathLike) -> typing.Callable: + """ + Check that specified directory exists. + + Note that a regular file with that name will not match. + """ + + def f(path: _pathlib.Path) -> bool: + return (path / file).is_dir() + + return f + + +def matches_glob(pat: str) -> typing.Callable: + """ + Check that glob has at least one match. + """ + + def f(path: _pathlib.Path) -> bool: + matches = path.glob(pat) + try: + # Only need to get one item from generator + next(matches) + except StopIteration: + return False + else: + return True + + return f diff --git a/pyprojroot/here.py b/pyprojroot/here.py new file mode 100644 index 0000000..8349143 --- /dev/null +++ b/pyprojroot/here.py @@ -0,0 +1,55 @@ +""" +This module is inspired by the `here` library for R. +See https://github.com/r-lib/here. + +It is intended for interactive use only. +""" + +import pathlib as _pathlib +import warnings as _warnings +from os import PathLike as _PathLike + +from . import criterion +from .root import find_root, find_root_with_reason + +CRITERIA = [ + criterion.has_file(".here"), + criterion.has_dir(".git"), + criterion.matches_glob("*.Rproj"), + criterion.has_file("requirements.txt"), + criterion.has_file("setup.py"), + criterion.has_dir(".dvc"), + criterion.has_dir(".spyproject"), + criterion.has_file("pyproject.toml"), + criterion.has_dir(".idea"), + criterion.has_dir(".vscode"), +] + + +def get_here(): + # TODO: This should only find_root once per session + start = _pathlib.Path.cwd() + path, reason = find_root_with_reason(CRITERIA, start=start) + return path, reason + + +# TODO: Implement set_here + + +def here(relative_project_path: _PathLike = "", warn_missing=False) -> _pathlib.Path: + """ + Returns the path relative to the projects root directory. + :param relative_project_path: relative path from project root + :param project_files: list of files to track inside the project + :param warn_missing: warn user if path does not exist (default=False) + :return: pathlib path + """ + path, reason = get_here() + # TODO: Show reason when requested + + if relative_project_path: + path = path / relative_project_path + + if warn_missing and not path.exists(): + _warnings.warn(f"Path doesn't exist: {path!s}") + return path diff --git a/pyprojroot/pyprojroot.py b/pyprojroot/pyprojroot.py deleted file mode 100644 index 20a4758..0000000 --- a/pyprojroot/pyprojroot.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Tuple -from pathlib import Path -import warnings - - -def py_project_root(path: Path, project_files: Tuple[str, ...]) -> Path: - """ - Recursively searches for project files in the current working directory - to find the project root of the python project. - :param path: pathlib path object - :param project_files: list of to track project files - :return: pathlib path - """ - for file in project_files: - found = list(path.glob(file)) - if len(found) > 0: - return path - return py_project_root(path.parent, project_files) - - -def here( - relative_project_path: str = ".", - project_files: Tuple[str, ...] = ( - ".git", - ".here", - "*.Rproj", - "requirements.txt", - "setup.py", - ".dvc", - ".spyproject", - "pyproject.toml", - ".idea", - ".vscode", - ), - warn: bool = True, -) -> Path: - """ - Returns the directory relative to the projects root directory. - :param relative_project_path: relative path from project root - :param project_files: list of files to track inside the project - :param warn: warn user if path does not exist - :return: pathlib path - """ - project_path = py_project_root(Path(".").cwd(), project_files) - path = project_path.joinpath(relative_project_path) - - if path.exists(): - return path - else: - if warn: - warnings.warn("Path doesn't exist: {}".format(path)) - return path diff --git a/pyprojroot/root.py b/pyprojroot/root.py new file mode 100644 index 0000000..b8ef64a --- /dev/null +++ b/pyprojroot/root.py @@ -0,0 +1,66 @@ +""" +This module is inspired by the `rprojroot` library for R. +See https://github.com/r-lib/rprojroot. + +It is intended for interactive or programmatic only. +""" + +import pathlib as _pathlib +import typing as _typing +from os import PathLike as _PathLike + +from .criterion import as_root_criterion as _as_root_criterion + + +def as_start_path(start: _PathLike) -> _pathlib.Path: + if start is None: + return _pathlib.Path.cwd() + if not isinstance(start, _pathlib.Path): + start = _pathlib.Path(start) + # TODO: consider `start = start.resolve()` + return start + + +def find_root_with_reason( + criterion, start: _PathLike = None +) -> _typing.Tuple[_pathlib.Path, str]: + """ + Find directory matching root criterion with reason. + + Recursively search parents of start path for directory + matching root criterion with reason. + """ + # TODO: Implement reasons + + # Prepare inputs + criterion = _as_root_criterion(criterion) + start = as_start_path(start) + + # Check start + if start.is_dir() and criterion(start): + return start, "Pass" + + # Iterate over all parents + # TODO: Consider adding maximum depth + # TODO: Consider limiting depth to path (e.g. "if p == stop: raise") + for p in start.parents: + if criterion(p): + return p, "Pass" + + # Not found + raise RuntimeError("Project root not found.") + + +def find_root(criterion, start: _PathLike = None, **kwargs) -> _pathlib.Path: + """ + Find directory matching root criterion. + + Recursively search parents of start path for directory + matching root criterion. + """ + try: + root, _ = find_root_with_reason(criterion, start=start, **kwargs) + except RuntimeError as ex: + raise ex + else: + return root diff --git a/setup.py b/setup.py index bfd1a1a..622045b 100644 --- a/setup.py +++ b/setup.py @@ -5,8 +5,8 @@ setuptools.setup( name="pyprojroot", - version="0.2.0", - description="Find project root paths and return relative project files", + version="0.3.0", + description="Project-oriented workflow in Python", long_description=long_description, long_description_content_type="text/markdown", author="Daniel Chen", @@ -18,5 +18,6 @@ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - ] + ], + python_requires=">=3.6", ) diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_here.py b/tests/test_here.py new file mode 100644 index 0000000..d9a8afb --- /dev/null +++ b/tests/test_here.py @@ -0,0 +1,40 @@ +import os + +import pytest + +from pyprojroot.here import here + + +@pytest.mark.parametrize( + "project_files,file_type", + [ + (".git", "dir"), + (".here", "file"), + ("my_project.Rproj", "file"), + ("requirements.txt", "file"), + ("setup.py", "file"), + (".dvc", "dir"), + ], +) +@pytest.mark.parametrize("child_dir", ["stuff", "src", "data", "data/hello"]) +def test_here(tmp_path, project_files, file_type, child_dir): + """ + This test uses pytest's tmp_path facilities to create a simulated project + directory, and checks that the path is correct. + """ + # Create project file + if file_type == "file": + (tmp_path / project_files).write_text("blah") + elif file_type == "dir": + (tmp_path / project_files).mkdir(parents=True) + else: + raise ValueError("Invalid input: {file_type}") + + # Create child dirs + start_dir = tmp_path / child_dir + start_dir.mkdir(parents=True) + os.chdir(start_dir) + + # Verify the project against current work directory + current_path = here() + assert current_path == tmp_path diff --git a/tests/test_pyprojroot.py b/tests/test_pyprojroot.py deleted file mode 100644 index ff67a83..0000000 --- a/tests/test_pyprojroot.py +++ /dev/null @@ -1,33 +0,0 @@ -from os import chdir -from pathlib import Path - -import pytest - -from pyprojroot import __version__, here - - -def test_version() -> None: - assert __version__ == "0.2.0" - - -@pytest.mark.parametrize( - "project_file", - (".git", ".here", "my_project.Rproj", "requirements.txt", "setup.py", ".dvc"), -) -@pytest.mark.parametrize("child_dir", ("stuff", "src", "data", "data/hello")) -def test_here(tmp_path: Path, project_file: str, child_dir: str) -> None: - """ - This test uses pytest's tmp_path facilities to create a simulated project - directory, and checks that the path is correct. - """ - # Create project file - (tmp_path / project_file).write_text("blah") - - # Create child dirs - child_path = tmp_path / child_dir - child_path.mkdir(parents=True) - chdir(child_path) - assert Path.cwd() == child_path - - # Verify the project against current work directory - assert here() == tmp_path