Skip to content

Commit

Permalink
Merge pull request #19 from onuralpszr/pre-commit
Browse files Browse the repository at this point in the history
feat: ✨ introduce pyproject toml file and pre commit with CI
  • Loading branch information
AyushExel authored Jul 17, 2023
2 parents 908640d + 2891ebd commit 84621e5
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 130 deletions.
10 changes: 2 additions & 8 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff pytest
pip install pytest
pip install git+https://github.com/ultralytics/ultralytics.git@embeddings
pip install -e .
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
ruff --format=github --select=E9,F63,F7,F82 --target-version=py37 --line-length=120 .
# default set of ruff rules with GitHub Annotations
ruff --format=github --target-version=py37 --line-length=120 .
- name: Test with pytest
run: |
pytest tests
36 changes: 36 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks

default_language_version:
python: python3.8
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-toml
- id: check-yaml
args:
- --unsafe
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/asottile/pyupgrade
rev: v3.7.0
hooks:
- id: pyupgrade
args:
- --py3-plus
- --keep-runtime-typing
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.275
hooks:
- id: ruff
args:
- --fix
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
ci:
autofix_commit_msg: "fix(pre_commit): 🎨 auto format pre-commit hooks"
autoupdate_commit_msg: "fix(pre_commit): ⬆ pre_commit autoupdate"
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
include requirements.txt
include requirements.txt
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# YOLOExplorer

Explore, manipulate and iterate on Computer Vision datasets with precision using simple APIs.
Explore, manipulate and iterate on Computer Vision datasets with precision using simple APIs.
Supports SQL filters, vector similarity search, native interface with Pandas and more.


Expand Down Expand Up @@ -111,12 +111,12 @@ coco_exp.remove_imgs([100,120,300..n]) # Removes images at the given ids.
<b>Adding data</b><br/>
For adding data from another dataset, you need an explorer object of that dataset with embeddings built. You can then pass that object along with the ids of the imgs that you'd like to add from that dataset.
```
coco_exp.add_imgs(exp, idxs) #
coco_exp.add_imgs(exp, idxs) #
```
Note: You can use SQL querying and/or similarity searches to get the desired ids from the datasets.

<b>Persisting the Table: Create new dataset and start training</b><br/>
After making the desired changes, you can persist the table to create the new dataset.
After making the desired changes, you can persist the table to create the new dataset.
```
coco_exp.persist()
```
Expand Down Expand Up @@ -175,4 +175,3 @@ Pre-filtering will enable powerful queries like - "Show me images similar to <IM
Notes:
* The API will have some minor changes going from dev to minor release
* For all practical purposes the ids are same as row number and is reset after every addition or removal

20 changes: 9 additions & 11 deletions frontend/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@
default_data_yaml = "coco128.yaml"
st.text(f"Dataset: {default_data_yaml}")
if default_data_yaml:
if 'data_yaml' not in st.session_state:
st.session_state['data_yaml'] = default_data_yaml
if 'session' not in st.session_state:
st.session_state['session'] = Explorer(default_data_yaml, model=None)
dataset_info = st.session_state['session'].dataset_info
if "data_yaml" not in st.session_state:
st.session_state["data_yaml"] = default_data_yaml
if "session" not in st.session_state:
st.session_state["session"] = Explorer(default_data_yaml, model=None)
dataset_info = st.session_state["session"].dataset_info

st.write("**Dataset Information :**")

for dataset_type in ["train", "val", "tets"]:
st.text(f"{dataset_type.capitalize()} paths :")
if isinstance(dataset_info['train'], str):
st.text(dataset_info['train'])
elif isinstance(dataset_info['train', List]):
for p in dataset_info['train']:
if isinstance(dataset_info["train"], str):
st.text(dataset_info["train"])
elif isinstance(dataset_info["train", List]):
for p in dataset_info["train"]:
st.text(p)

st.write("**Dataset Visualization**")
Expand All @@ -45,5 +45,3 @@
train_image_paths = [img_url] * int(num_samples_show)
# st.image(train_image_paths, use_column_width="always")
clicked = image_select("Training Samples", images=train_image_paths)


60 changes: 60 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[tool.ruff]
# Enable the pycodestyle (`E`) and Pyflakes (`F`) rules by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E9", "F63", "F7", "F82"]
ignore = []

# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
]
per-file-ignores = {}
line-length = 120

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

# Assume Python 3.8
target-version = "py38"

[tool.black]
line-length = 120
include = '\.pyi?$'
exclude = '''
/(
\.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ duckdb
scikit-learn
ultralytics@git+https://github.com/ultralytics/ultralytics.git@embeddings
# streamlit # To run dashboard
# streamlit-image-select # For image gallery
# streamlit-image-select # For image gallery
11 changes: 6 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
# Settings
FILE = Path(__file__).resolve()
PARENT = FILE.parent # root directory
README = (PARENT / 'README.md').read_text(encoding='utf-8')
README = (PARENT / "README.md").read_text(encoding="utf-8")
REQUIREMENTS = [
"lancedb",
"duckdb",
"scikit-learn",
"ultralytics@git+https://github.com/ultralytics/ultralytics.git@embeddings"
"lancedb",
"duckdb",
"scikit-learn",
"ultralytics@git+https://github.com/ultralytics/ultralytics.git@embeddings",
]


def get_version():
return "0.0.1.dev1"

Expand Down
20 changes: 5 additions & 15 deletions tests/test_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,8 @@ class TestExplorer:
def test_embeddings_creation(self):
coco_exp = Explorer("coco8.yaml")
coco_exp.build_embeddings(force=True)
assert (
coco_exp.table_name == "coco8.yaml"
), "the table name should be coco8.yaml"
assert (
len(coco_exp.table) == 4
), "the length of the embeddings table should be 8"
assert coco_exp.table_name == "coco8.yaml", "the table name should be coco8.yaml"
assert len(coco_exp.table) == 4, "the length of the embeddings table should be 8"

def test_sim_idx(self):
coco_exp = Explorer("coco8.yaml")
Expand All @@ -32,13 +28,9 @@ def test_operations(self):
coco_exp.log_status()
coco_exp.remove_imgs([0, 1])
coco_exp.remove_imgs([0])
assert (
len(coco_exp.table.to_arrow()) == 1
), "the length of the embeddings table should be 1"
assert len(coco_exp.table.to_arrow()) == 1, "the length of the embeddings table should be 1"
coco_exp.persist()
assert (
len(coco_exp.table.to_arrow()) == 1
), "the length of the embeddings table should be 1"
assert len(coco_exp.table.to_arrow()) == 1, "the length of the embeddings table should be 1"

def test_add_imgs(self):
coco_exp = Explorer("coco8.yaml")
Expand All @@ -47,9 +39,7 @@ def test_add_imgs(self):
coco128_exp.build_embeddings()

coco_exp.add_imgs(coco128_exp, [i for i in range(4)])
assert (
len(coco_exp.table) == 8
), "the length of the embeddings table should be 8"
assert len(coco_exp.table) == 8, "the length of the embeddings table should be 8"

def test_sql(self):
coco_exp = Explorer("coco8.yaml")
Expand Down
2 changes: 1 addition & 1 deletion yoloexplorer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .explorer import Explorer
from .explorer import Explorer

__all__ = ["Explorer"]
4 changes: 1 addition & 3 deletions yoloexplorer/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ def get_relative_path(path1, path2):

class Dataset(YOLODataset):
def __init__(self, *args, data=None, **kwargs):
super().__init__(
*args, data=data, use_segments=False, use_keypoints=False, **kwargs
)
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)

# NOTE: Load the image directly without any resize operations.
def load_image(self, i):
Expand Down
Loading

0 comments on commit 84621e5

Please sign in to comment.