Skip to content

Commit 4a742ff

Browse files
authored
Merge pull request #5 from DagsHub/refactor/static-annotations
Refactor: static annotations
2 parents f014659 + be03218 commit 4a742ff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+3500
-1664
lines changed
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: Lint and Test
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
branches:
9+
- main
10+
11+
jobs:
12+
run-linters:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v3
16+
- uses: chartboost/ruff-action@v1
17+
18+
run-tests:
19+
runs-on: ubuntu-latest
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
python-version: [3.8, 3.9, "3.10", 3.11, 3.12]
24+
steps:
25+
- uses: actions/checkout@v3
26+
- uses: actions/setup-python@v4
27+
with:
28+
python-version: ${{ matrix.python-version }}
29+
- name: Install Hatch
30+
run: pipx install hatch
31+
- name: Run tests
32+
run: hatch test

README.md

+17-29
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,26 @@ between different annotation formats.
66
This package is currently in development and has not that many features implemented.
77
The API is not stable and is subject to change.
88

9-
Support Matrix for image annotations
9+
The package consists of the Intermediary Representation (IR) annotation format in Python Objects,
10+
and importers/exporters for different annotation formats.
1011

11-
| Export > \/ Import V | YOLO v5+ BBox | YOLO v5+ Segmentation | Yolo Poses | COCO | DagsHub Datasource (Label Studio) | Label Studio | CVAT Image |
12-
|-----------------------------------|---------------|-----------------------|------------|------|-----------------------------------|--------------|------------|
13-
| YOLO v5+ BBox | - | | | || | |
14-
| YOLO v5+ Segmentation | | - | | || | |
15-
| YOLO Poses | | | - | || | |
16-
| COCO | | | | - | | | |
17-
| DagsHub Datasource (Label Studio) |||| | - | | |
18-
| Label Studio | | | | | | - | |
19-
| CVAT Image | | | | || | - |
12+
## Installation
2013

21-
Example usage, importing annotations from [COCO_1K](https://dagshub.com/Dean/COCO_1K) and uploading it into a DagsHub Datasource:
14+
```bash
15+
pip install dagshub-annotation-converter
16+
```
2217

18+
## Importers (Image):
19+
- [YOLO BBox, Segmentation, Poses](dagshub_annotation_converter/converters/yolo.py#L81)
20+
- [Label Studio](dagshub_annotation_converter/formats/label_studio/task.py#L72) (Only task schema implemented, importing from a project is left up to user):
2321
```python
24-
from dagshub_annotation_converter.image.importers import YoloImporter
25-
from dagshub_annotation_converter.image.exporters import DagshubDatasourceExporter
26-
27-
from dagshub.data_engine.datasources import get_datasource
28-
29-
# Assuming that the current worker directory is the root of the repo and images are stored in "data" folder
30-
importer = YoloImporter(
31-
data_dir="data", # Where the images are stored
32-
annotation_type="segmentation", # or bbox for bounding boxes
33-
meta_file="custom_coco.yaml" # file with the classes
34-
)
22+
from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
23+
task_obj = LabelStudioTask.from_json("path/to/label_studio_task.json")
3524

36-
proj = importer.parse()
37-
38-
exporter = DagshubDatasourceExporter(
39-
datasource=get_datasource("<user>/<repo>", "<my datasource>"),
40-
annotation_field="exported_yolo_annotations"
41-
)
42-
exporter.export(proj)
25+
annotations = task_obj.to_ir_annotations()
4326
```
27+
- [CVAT Image](dagshub_annotation_converter/converters/cvat.py#L46)
28+
29+
## Exporters (Image):
30+
- [YOLO BBox, Segmentation, Poses](dagshub_annotation_converter/converters/yolo.py#L126)
31+
- [Label Studio](dagshub_annotation_converter/formats/label_studio/task.py#L225) (Again, only task schema, uploading the task to the project is left to the user)

dagshub_annotation_converter/cli.py

-2
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from typing import Sequence, Mapping, List, Dict
2+
3+
from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
4+
5+
6+
def group_annotations_by_filename(
7+
annotations: Sequence[IRImageAnnotationBase],
8+
) -> Mapping[str, Sequence[IRImageAnnotationBase]]:
9+
res: Dict[str, List[IRImageAnnotationBase]] = {}
10+
for ann in annotations:
11+
if ann.filename is None:
12+
raise ValueError(f"An annotation {ann} doesn't have a filename associated, aborting")
13+
if ann.filename not in res:
14+
res[ann.filename] = []
15+
res[ann.filename].append(ann)
16+
return res
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import logging
2+
from os import PathLike
3+
from typing import Sequence, List, Dict
4+
from zipfile import ZipFile
5+
6+
import lxml.etree
7+
8+
from dagshub_annotation_converter.formats.cvat import annotation_parsers
9+
from dagshub_annotation_converter.formats.cvat.context import parse_image_tag
10+
from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
11+
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def parse_image_annotations(img: lxml.etree.ElementBase) -> Sequence[IRImageAnnotationBase]:
17+
annotations: List[IRImageAnnotationBase] = []
18+
for annotation_elem in img:
19+
annotation_type = annotation_elem.tag
20+
if annotation_type not in annotation_parsers:
21+
logger.warning(f"Unknown CVAT annotation type {annotation_type}")
22+
continue
23+
annotations.append(annotation_parsers[annotation_type](annotation_elem, img))
24+
25+
return annotations
26+
27+
28+
def load_cvat_from_xml_string(
29+
xml_text: bytes,
30+
) -> Dict[str, Sequence[IRImageAnnotationBase]]:
31+
annotations = {}
32+
root_elem = lxml.etree.XML(xml_text)
33+
34+
for image_node in root_elem.xpath("//image"):
35+
image_info = parse_image_tag(image_node)
36+
annotations[image_info.name] = parse_image_annotations(image_node)
37+
38+
return annotations
39+
40+
41+
def load_cvat_from_xml_file(xml_file: PathLike) -> Dict[str, Sequence[IRImageAnnotationBase]]:
42+
with open(xml_file, "rb") as f:
43+
return load_cvat_from_xml_string(f.read())
44+
45+
46+
def load_cvat_from_zip(zip_path: PathLike) -> Dict[str, Sequence[IRImageAnnotationBase]]:
47+
with ZipFile(zip_path) as proj_zip:
48+
with proj_zip.open("annotations.xml") as f:
49+
return load_cvat_from_xml_string(f.read())
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import logging
2+
import os
3+
from pathlib import Path
4+
from typing import Union, Sequence, List, Optional, Dict, Tuple
5+
6+
import PIL.Image
7+
8+
from dagshub_annotation_converter.converters.common import group_annotations_by_filename
9+
from dagshub_annotation_converter.formats.yolo import (
10+
export_lookup,
11+
allowed_annotation_types,
12+
YoloContext,
13+
import_lookup,
14+
YoloAnnotationTypes,
15+
)
16+
from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
17+
from dagshub_annotation_converter.util import is_image, replace_folder
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
def load_yolo_from_fs_with_context(
23+
context: YoloContext,
24+
import_dir: Union[str, Path] = ".",
25+
) -> Dict[str, Sequence[IRImageAnnotationBase]]:
26+
assert context.path is not None
27+
28+
annotations: Dict[str, Sequence[IRImageAnnotationBase]] = {}
29+
30+
import_dir_path = Path(import_dir)
31+
32+
if context.path.is_absolute():
33+
data_dir_path = context.path
34+
else:
35+
data_dir_path = import_dir_path / context.path
36+
37+
for dirpath, subdirs, files in os.walk(data_dir_path):
38+
if context.image_dir_name not in dirpath.split("/"):
39+
logger.debug(f"{dirpath} is not an image dir, skipping")
40+
continue
41+
for filename in files:
42+
fullpath = os.path.join(dirpath, filename)
43+
img = Path(fullpath)
44+
relpath = img.relative_to(data_dir_path)
45+
if not is_image(img):
46+
logger.debug(f"Skipping {img} because it's not an image")
47+
continue
48+
annotation = replace_folder(img, context.image_dir_name, context.label_dir_name, context.label_extension)
49+
if annotation is None:
50+
logger.warning(f"Couldn't generate annotation file path for image file [{img}]")
51+
continue
52+
if not annotation.exists():
53+
logger.warning(f"Couldn't find annotation file [{annotation}] for image file [{img}]")
54+
continue
55+
annotations[str(relpath)] = parse_annotation(context, data_dir_path, img, annotation)
56+
57+
return annotations
58+
59+
60+
def parse_annotation(
61+
context: YoloContext, base_path: Path, img_path: Path, annotation_path: Path
62+
) -> Sequence[IRImageAnnotationBase]:
63+
img = PIL.Image.open(img_path)
64+
img_width, img_height = img.size
65+
66+
annotation_strings = annotation_path.read_text().strip().split("\n")
67+
68+
assert context.annotation_type is not None
69+
70+
convert_func = import_lookup[context.annotation_type]
71+
72+
res: List[IRImageAnnotationBase] = []
73+
rel_path = str(img_path.relative_to(base_path))
74+
75+
for ann in annotation_strings:
76+
res.append(convert_func(ann, context, img_width, img_height, img).with_filename(rel_path))
77+
78+
return res
79+
80+
81+
def load_yolo_from_fs(
82+
annotation_type: YoloAnnotationTypes,
83+
meta_file: Union[str, Path] = "annotations.yaml",
84+
image_dir_name: str = "images",
85+
label_dir_name: str = "labels",
86+
) -> Tuple[Dict[str, Sequence[IRImageAnnotationBase]], YoloContext]:
87+
meta_file_path = Path(meta_file).absolute()
88+
context = YoloContext.from_yaml_file(meta_file, annotation_type=annotation_type)
89+
context.image_dir_name = image_dir_name
90+
context.label_dir_name = label_dir_name
91+
context.annotation_type = annotation_type
92+
93+
return load_yolo_from_fs_with_context(context, import_dir=meta_file_path.parent), context
94+
95+
96+
# ======== Annotation Export ======== #
97+
98+
99+
def annotations_to_string(annotations: Sequence[IRImageAnnotationBase], context: YoloContext) -> Optional[str]:
100+
"""
101+
Serializes multiple YOLO annotations into the contents of the annotations file.
102+
Also makes sure that only annotations of the correct type for context.annotation_type are serialized.
103+
104+
:param annotations: Annotations to serialize (should be single file)
105+
:param context: Exporting context
106+
:return: String of the content of the file
107+
"""
108+
filtered_annotations = [
109+
ann for ann in annotations if isinstance(ann, allowed_annotation_types[context.annotation_type])
110+
]
111+
112+
if len(filtered_annotations) != len(annotations):
113+
logger.warning(
114+
f"{annotations[0].filename} has {len(annotations) - len(filtered_annotations)} "
115+
f"annotations of the wrong type that won't be exported"
116+
)
117+
118+
if len(filtered_annotations) == 0:
119+
return None
120+
121+
export_fn = export_lookup[context.annotation_type]
122+
123+
return "\n".join([export_fn(ann, context) for ann in filtered_annotations])
124+
125+
126+
def export_to_fs(
127+
context: YoloContext,
128+
annotations: List[IRImageAnnotationBase],
129+
export_dir: Union[str, Path] = ".",
130+
meta_file="yolo_dagshub.yaml",
131+
) -> Path:
132+
"""
133+
Exports annotations to YOLO format.
134+
135+
This function exports them in a way that allows you to train with YOLO right away,
136+
as long as the images have already been copied to the data folder.
137+
138+
:param context: Context for exporting. Set the ``path`` attribute to specify the directory with the data,
139+
otherwise exports a ``data`` folder in the current working directory.
140+
:param annotations: Annotations to export
141+
:param export_dir: Directory to export to. If not specified, exports to the current working directory.
142+
:param meta_file: Name of the YAML file of the YOLO dataset definition.
143+
This file will be written to the parent directory of the data path.
144+
145+
:return: Path to the YAML file with the exported data
146+
"""
147+
if context.path is None:
148+
print(f"`YoloContext.path` was not set. Exporting to {os.path.join(os.getcwd(), 'data')}")
149+
context.path = Path("data")
150+
151+
grouped_annotations = group_annotations_by_filename(annotations)
152+
153+
export_path = Path(export_dir)
154+
155+
for filename, anns in grouped_annotations.items():
156+
annotation_filepath = replace_folder(
157+
Path(filename), context.image_dir_name, context.label_dir_name, context.label_extension
158+
)
159+
if annotation_filepath is None:
160+
logger.warning(f"Couldn't generate annotation file path for image file [{filename}]")
161+
continue
162+
annotation_filename = export_path / context.path / annotation_filepath
163+
annotation_filename.parent.mkdir(parents=True, exist_ok=True)
164+
annotation_content = annotations_to_string(anns, context)
165+
if annotation_content is not None:
166+
with open(annotation_filename, "w") as f:
167+
f.write(annotation_content)
168+
169+
# TODO: test/val splitting
170+
yaml_file_path = export_path / meta_file
171+
with open(yaml_file_path, "w") as yaml_f:
172+
yaml_f.write(context.get_yaml_content())
173+
174+
logger.warning(f"Saved annotations to {context.path}\nand .YAML file at {yaml_file_path}")
175+
176+
return yaml_file_path.absolute()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pathlib import Path
2+
from typing import Union, Optional, Tuple
3+
4+
import PIL.Image
5+
6+
ImageType = Union[str, Path, PIL.Image.Image]
7+
8+
9+
def determine_image_dimensions(
10+
image_width: Optional[int] = None,
11+
image_height: Optional[int] = None,
12+
image: Optional[ImageType] = None,
13+
) -> Tuple[int, int]:
14+
if image_width is not None and image_height is not None:
15+
return image_width, image_height
16+
if image is None:
17+
raise ValueError("Either image or image_width and image_height should be provided")
18+
19+
if not isinstance(image, PIL.Image.Image):
20+
image = PIL.Image.open(image)
21+
return image.size
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from typing import Callable, Dict
2+
3+
from lxml.etree import ElementBase
4+
5+
from .box import parse_box
6+
from .polygon import parse_polygon
7+
from .points import parse_points
8+
from .skeleton import parse_skeleton
9+
from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
10+
11+
CVATParserFunction = Callable[[ElementBase, ElementBase], IRImageAnnotationBase]
12+
13+
annotation_parsers: Dict[str, CVATParserFunction] = {
14+
"box": parse_box,
15+
"polygon": parse_polygon,
16+
"points": parse_points,
17+
"skeleton": parse_skeleton,
18+
}

0 commit comments

Comments
 (0)