From aa988caea8fcb9b02e640a74f5e46cf1bb36ff7c Mon Sep 17 00:00:00 2001 From: bkmartinjr Date: Mon, 23 Sep 2024 11:57:15 -0700 Subject: [PATCH] initial scaffolding for project --- .gitignore | 162 ++++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 26 ++++++ CHANGELOG.md | 21 +++++ pyproject.toml | 76 ++++++++++++++++ src/tiledbsoma_ml/__init__.py | 8 ++ src/tiledbsoma_ml/py.typed | 2 + 6 files changed, 295 insertions(+) create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 CHANGELOG.md create mode 100644 pyproject.toml create mode 100644 src/tiledbsoma_ml/__init__.py create mode 100644 src/tiledbsoma_ml/py.typed diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..efa407c --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e0f39b5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: + - repo: https://github.com/psf/black + rev: "24.8.0" + hooks: + - id: black + exclude: 'apis/' + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.5 + hooks: + - id: ruff + name: "ruff for tiledbsoma_ml" + args: ["--config=pyproject.toml"] + exclude: 'apis/' + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.11.2 + hooks: + - id: mypy + pass_filenames: false + args: ["--config-file=pyproject.toml", "src"] + additional_dependencies: + - attrs + - numpy + - pandas-stubs>=2 + exclude: 'apis/' diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..91b7fc3 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,21 @@ + +# Change Log + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/) +and this project adheres to [Semantic Versioning](http://semver.org/). + +## [Unreleased] - yyyy-mm-dd + +Port and enhance contribution from the Chan Zuckerberg Initiative Foundation +[CELLxGENE](https://cellxgene.cziscience.com/) project. + +This is not a one-for-one migration of the contributed code. Substantial changes have +been made to the package utility (e.g., multi-GPU support), improve API usability, etc. + +### Added + +### Changed + +### Fixed diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7901253 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,76 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "tiledbsoma-ml" +dynamic = ["version"] +dependencies = [ + "attrs>=22.2", + "tiledbsoma>=1.9.0", + "torch>=2.0", + "torchdata<=0.9", + "numpy", + "numba", + "pandas", + "pyarrow", + "scipy" +] +requires-python = ">= 3.9" +description = "Machine learning tools for use with tiledbsoma" +readme = "README.md" +authors = [ + {name = "TileDB, Inc.", email = "help@tiledb.io"}, + {name = "The Chan Zuckerberg Initiative Foundation", email = "soma@chanzuckerberg.com" }, +] +maintainers = [ + {name = "TileDB, Inc.", email="help@tiledb.io"}, +] + +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Operating System :: Unix", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +[project.urls] +Repository = "https://github.com/TileDB-Inc/TileDB-SOMA-ML.git" +Issues = "https://github.com/TileDB-Inc/TileDB-SOMA-ML/issues" +Changelog = "https://github.com/TileDB-Inc/TileDB-SOMA-ML/blob/main/CHANGELOG.md" + +[tool.setuptools.dynamic] +version = {attr = "tiledbsoma_ml.__version__"} + +[tool.setuptools.package-data] +"tiledbsoma_ml" = ["py.typed"] + +[tool.setuptools_scm] +root = "../../.." + +[tool.mypy] +show_error_codes = true +ignore_missing_imports = true +warn_unreachable = true +strict = true +python_version = 3.9 +plugins = "numpy.typing.mypy_plugin" + +[tool.ruff] +lint.select = ["E", "F", "B", "I"] +lint.ignore = ["E501"] # line too long +lint.extend-select = ["I001"] # unsorted-imports +fix = true +target-version = "py39" +line-length = 120 diff --git a/src/tiledbsoma_ml/__init__.py b/src/tiledbsoma_ml/__init__.py new file mode 100644 index 0000000..7adb437 --- /dev/null +++ b/src/tiledbsoma_ml/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2021-2024 The Chan Zuckerberg Initiative Foundation +# Copyright (c) 2021-2024 TileDB, Inc. +# +# Licensed under the MIT License. + +"""An API to support machine learning applications built on SOMA.""" + +__version__ = "0.1.0-dev" diff --git a/src/tiledbsoma_ml/py.typed b/src/tiledbsoma_ml/py.typed new file mode 100644 index 0000000..288a150 --- /dev/null +++ b/src/tiledbsoma_ml/py.typed @@ -0,0 +1,2 @@ +# Marker file to indicate that this package contains Python typing information, +# and that mypy can use it to typecheck client code.