Skip to content

Commit

Permalink
Add README, CI, and other boilerplate (#1)
Browse files Browse the repository at this point in the history
Add python infra, CI, testing, etc.. Also a basic README.
  • Loading branch information
augray authored Aug 30, 2024
1 parent e6e83c0 commit b4140df
Show file tree
Hide file tree
Showing 10 changed files with 1,053 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: CI

on: [push]

jobs:
test-python:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up python ${{ matrix.python-version }}
run: make py-prep PY_VERSION=${{ matrix.python-version }}

- name: Run static analysis
run: make lint

- name: Run tests
run: make test
137 changes: 137 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# python node packages
python/package-lock.json
python/package.json

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Editors
.vscode/
.helix
.*swp

# Misc
.DS_Store

# Python wheels
*.whl
44 changes: 44 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
SHELL := /bin/bash
PROJECT_NAME := airtrain
PY_VERSION := "3.11"

.PHONY: wheel
wheel:
uvx pip wheel -w dist .

.PHONY: test-release
test-release: wheel
uvx twine check dist/*airtrain*.whl
uvx twine upload --repository testpypi dist/*airtrain*.whl

.PHONY: release
release: wheel
uvx twine check dist/*airtrain*.whl
uvx twine upload dist/*airtrain*.whl

.PHONY: py-prep
py-prep:
uv --version || curl -LsSf https://astral.sh/uv/install.sh | sh
rm -rf ".venv" || echo "No virtualenv yet"
uv venv --python $(PY_VERSION)
uv tool install --force ruff==0.6.1
uv add --editable .

.PHONY: sync
sync:
uv sync

.PHONY: fix
fix:
uvx ruff format
uvx ruff check --fix --show-fixes src/airtrain

.PHONY: lint
lint:
uvx ruff format --check src/airtrain
uvx ruff check --fix src/airtrain
uv run mypy --explicit-package-bases src/airtrain

.PHONY: test
test:
uv run pytest ./
65 changes: 65 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<div align="center">
<img src="images/airtrain-logo.png" alt="Airtrain Ai Logo" style="vertical-align: middle; display: inline-block;" width="100px">
</div>

<p align="center">
<a href="https://github.com/sematic-ai/py-airtrain/actions/workflows/ci.yaml?query=branch%3Amain+" target="_blank">
<img height="30px" src="https://github.com/sematic-ai/py-airtrain/actions/workflows/ci.yaml/badge.svg?branch=main" alt="CI status">
</a>
<a href="./LICENSE" target="_blank">
<img height="30px" src="https://img.shields.io/pypi/l/sematic?style=for-the-badge" alt="License">
</a>
<a href="https://airtrain.ai" target="_blank">
<img height="30px" src="https://img.shields.io/badge/Made_by-Airtrain_🚀-blue?style=for-the-badge&logo=none" alt="Made by Airtrain">
</a>
<a href="https://docs.python.org/3.8/" target="_blank">
<img height="30px" src="https://img.shields.io/badge/Python-3.8-blue?style=for-the-badge&logo=python" alt="Python Version">
</a>
<a href="https://docs.python.org/3.9/" target="_blank">
<img height="30px" src="https://img.shields.io/badge/Python-3.9-blue?style=for-the-badge&logo=python" alt="Python Version">
</a>
<a href="https://docs.python.org/3.10/" target="_blank">
<img height="30px" src="https://img.shields.io/badge/Python-3.10-blue?style=for-the-badge&logo=python" alt="Python Version">
</a>
<a href="https://docs.python.org/3.11/" target="_blank">
<img height="30px" src="https://img.shields.io/badge/Python-3.11-blue?style=for-the-badge&logo=python" alt="Python Version">
</a>
<a href="https://docs.python.org/3.12/" target="_blank">
<img height="30px" src="https://img.shields.io/badge/Python-3.12-blue?style=for-the-badge&logo=python" alt="Python Version">
</a>
<a href="https://github.com/sematic-ai/py-airtrain/actions/workflows/ci.yaml?query=branch%3Amain+" target="_blank">
<img height="30px" src="https://github.com/sematic-ai/py-airtrain/actions/workflows/ci.yaml/badge.svg?branch=main" alt="CI status">
</a>
</p>


# Airtrain SDK

This repository holds the SDK for interacting with
[Airtrain](https://www.airtrain.ai/),
the tool for improving your AI apps, RAG pipelines, and models by curating
high-quality training and eval datasets.

## Usage

Obtain your API key by going to your user settings on
https://app.airtrain.ai .

Then you may upload a new dataset as follows:

```python
import airtrain as at

# Can also be set with the environment variable AIRTRAIN_API_KEY
at.api_key = "sUpErSeCr3t"

url = at.upload_from_dicts(
[
{"foo": "some text", "bar": "more text"},
{"foo": "even more text", "bar": "so much text"},
]
).url

# You may view your dataset at this URL
print(f"Dataset URL: {url}")
```
Binary file added images/airtrain-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
75 changes: 75 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
[project]
name = "airtrain"
description = "SDK for interacting with https://airtrain.ai"
version = "0.0.1"
requires-python = ">=3.8"
authors = [
{email = "[email protected]"},
]
readme = "README.md"
license = {file = "LICENSE"}
dependencies = [
"httpx>=0.25.0",
"pyarrow>=13.0.0",

# pyarrow requires numpy. Numpy version support is a bit
# complicated and isn't handled well transiently. This makes
# sure we support the broadest numpy dependency set possible.
"numpy>=1.26.0; python_version >= '3.12'",
"numpy<=1.24.4; python_version == '3.8'",
"numpy>=1.19.3; python_version >= '3.9'",

"airtrain",
]

classifiers = [
"License :: OSI Approved :: Apache Software License",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Typing :: Typed",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]

[tool.uv]
dev-dependencies = [
"mypy==1.11.1",
"ruff-lsp==0.0.35",
"python-lsp-ruff>=2.2.2",
"python-lsp-server>=1.11.0",
"pytest==7.4.0",
]

[tool.uv.sources]
airtrain = { workspace = true }

[project.urls]
Homepage = "https://airtrain.ai"
Documentation = "https://docs.airtrain.ai/"

[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.ruff]
line-length = 90

[tool.ruff.lint]
select = ["E", "F", "I"]

[tool.ruff.lint.isort]
known-first-party = ["airtrain"]

# Use a single line after each import block.
lines-after-imports = 2

[tool.pylsp.plugins.ruff]
enabled = true

[[tool.mypy.overrides]]
module = "airtrain.*"
ignore_missing_imports = true
1 change: 1 addition & 0 deletions src/airtrain/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from airtrain.core import DatasetMetadata, upload_from_dicts # noqa: F401
31 changes: 31 additions & 0 deletions src/airtrain/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from dataclasses import dataclass, fields
from typing import Any, Dict, Iterable, Union


@dataclass
class DatasetMetadata:
name: str
id: str
url: str
size: int

def __post_init__(self) -> None:
for field in fields(self):
value = getattr(self, field.name)
if not isinstance(value, field.type):
raise ValueError(
f"Field '{field.name}' must be {field.type}. Got: '{value}'"
)


def upload_from_dicts(
data: Iterable[Dict[str, Any]],
name: Union[str, None] = None,
embedding_column: Union[str, None] = None,
) -> DatasetMetadata:
return DatasetMetadata(
name=name or "My Dataset",
id="abc123",
url="https://example.com",
size=0,
)
6 changes: 6 additions & 0 deletions src/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from airtrain.core import DatasetMetadata, upload_from_dicts


def test_upload_from_dicts():
result = upload_from_dicts([{"foo": 42}, {"foo": 43}], name="Foo dataset")
assert isinstance(result, DatasetMetadata)
Loading

0 comments on commit b4140df

Please sign in to comment.