Merge pull request #654 from openvinotoolkit/zm/release-0.3

Release 0.3
openvinotoolkit · Feb 21, 2022 · 7e0131d · 7e0131d
2 parents b0fa100 + fd2b332
commit 7e0131d
Show file tree

Hide file tree

Showing 196 changed files with 7,279 additions and 2,186 deletions.
diff --git a/.github/workflows/github_pages.yml b/.github/workflows/github_pages.yml
@@ -32,7 +32,7 @@ jobs:
 
       - name: Build docs
         run: |
-          pip install gitpython packaging toml Sphinx==4.2.0 sphinx-rtd-theme==1.0.0
+          pip install gitpython packaging toml Sphinx==4.2.0 sphinx-rtd-theme==1.0.0 sphinx-copybutton==0.4.0
           pip install -r requirements.txt
           pip install git+https://github.com/pytorch-ignite/sphinxcontrib-versioning.git@a1a1a94ca80a0233f0df3eaf9876812484901e76
           sphinx-versioning -l site/source/conf.py build -r develop -w develop site/source site/static/api

diff --git a/.github/workflows/health_check.yml b/.github/workflows/health_check.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.6', '3.7', '3.8', '3.9']
+        python-version: ['3.7', '3.8', '3.9', '3.10']
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -24,6 +24,6 @@ jobs:
           pytest -v --cov --cov-report xml:coverage.xml
           datum -h
       - name: Sending coverage results
-        if: matrix.python-version == '3.6'
+        if: matrix.python-version == '3.7'
         run: |
           bash <(curl -Ls https://coverage.codacy.com/get.sh) report -r coverage.xml -t ${{ secrets.CODACY_PROJECT_TOKEN }}
diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ['macos-10.15', 'ubuntu-20.04', 'windows-2016']
-        python-version: ['3.6', '3.7', '3.8', '3.9']
+        python-version: ['3.7', '3.8', '3.9', '3.10']
     name: build and test (${{ matrix.os }}, Python ${{ matrix.python-version }})
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/3rd-party.txt b/3rd-party.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,91 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## 21/02/2022 - Release v0.3
+### Added
+- Ability to import a video as frames with the `video_frames` format and
+  to split a video into frames with the `datum util split_video` command
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- `--subset` parameter in the `image_dir` format
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- `MediaManager` API to control loaded media resources at runtime
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- Command to detect the format of a dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/576>)
+- More comfortable access to library API via `import datumaro`
+  (<https://github.com/openvinotoolkit/datumaro/pull/630>)
+- CLI command-like free functions (`export`, `transform`, ...)
+  (<https://github.com/openvinotoolkit/datumaro/pull/630>)
+- Reading specific annotation files for train dataset in Cityscapes
+  (<https://github.com/openvinotoolkit/datumaro/pull/632>)
+- Random sampling transforms (`random_sampler`, `label_random_sampler`)
+  to create smaller datasets from bigger ones
+  (<https://github.com/openvinotoolkit/datumaro/pull/636>,
+   <https://github.com/openvinotoolkit/datumaro/pull/640>)
+- API to report dataset import and export progress;
+  API to report dataset import and export errors and take action (skip, fail)
+  (supported in COCO, VOC and YOLO formats)
+  (<https://github.com/openvinotoolkit/datumaro/pull/650>)
+- Support for downloading the ImageNetV2 and COCO datasets
+  (<https://github.com/openvinotoolkit/datumaro/pull/653>,
+   <https://github.com/openvinotoolkit/datumaro/pull/659>)
+- A way for formats to signal that they don't support detection
+  (<https://github.com/openvinotoolkit/datumaro/pull/665>)
+- Removal transforms to remove items/annoations/attributes from dataset
+  (`remove_items`, `remove_annotations`, `remove_attributes`)
+  (<https://github.com/openvinotoolkit/datumaro/pull/670>)
+
+### Changed
+- Allowed direct file paths in `datum import`. Such sources are imported like
+  when the `rpath` parameter is specified, however, only the selected path
+  is copied into the project
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- Improved `stats` performance, added new filtering parameters,
+  image stats (`unique`, `repeated`) moved to the `dataset` section,
+  removed `mean` and `std` from the `dataset` section
+  (<https://github.com/openvinotoolkit/datumaro/pull/621>)
+- Allowed `Image` creation from just `size` info
+  (<https://github.com/openvinotoolkit/datumaro/pull/634>)
+- Added image search in VOC XML-based subformats
+  (<https://github.com/openvinotoolkit/datumaro/pull/634>)
+- Added image path equality checks in simple merge, when applicable
+  (<https://github.com/openvinotoolkit/datumaro/pull/634>)
+- Supported saving box attributes when downloading the TFDS version of VOC
+  (<https://github.com/openvinotoolkit/datumaro/pull/668>)
+
+### Deprecated
+- TBD
+
+### Removed
+- Official support of Python 3.6 (due to it's EOL)
+  (<https://github.com/openvinotoolkit/datumaro/pull/617>)
+- Backward compatibility annotation symbols in `components.extractor`
+  (<https://github.com/openvinotoolkit/datumaro/pull/630>)
+
+### Fixed
+- Prohibited calling `add`, `import` and `export` commands without a project
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- Calling `make_dataset` on empty project tree now produces the error properly
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- Saving (overwriting) a dataset in a project when rpath is used
+  (<https://github.com/openvinotoolkit/datumaro/pull/613>)
+- Output image extension preserving in the `Resize` transform
+  (<https://github.com/openvinotoolkit/datumaro/issues/606>)
+- Memory overuse in the `Resize` transform
+  (<https://github.com/openvinotoolkit/datumaro/issues/607>)
+- Invalid image pixels produced by the `Resize` transform
+  (<https://github.com/openvinotoolkit/datumaro/issues/618>)
+- Numeric warnings that sometimes occurred in `stats` command
+  (e.g. <https://github.com/openvinotoolkit/datumaro/issues/607>)
+  (<https://github.com/openvinotoolkit/datumaro/pull/621>)
+- Added missing item attribute merging in simple merge
+  (<https://github.com/openvinotoolkit/datumaro/pull/634>)
+- Inability to disambiguate VOC from LabelMe in some cases
+  (<https://github.com/openvinotoolkit/datumaro/issues/658>)
+
+### Security
+- TBD
+
 ## 28/01/2022 - Release v0.2.3
 ### Added
 - Command to download public datasets
@@ -18,11 +103,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/585>)
 
 ### Changed
+- The `pycocotools` dependency lower bound is raised to `2.0.4`.
+  (<https://github.com/openvinotoolkit/datumaro/pull/449>)
 - `smooth_line` from `datumaro.util.annotation_util` - the function
   is renamed to `approximate_line` and has updated interface
   (<https://github.com/openvinotoolkit/datumaro/pull/592>)
-- The `pycocotools` dependency lower bound is raised to `2.0.4`.
-  (<https://github.com/openvinotoolkit/datumaro/pull/449>)
 
 ### Deprecated
 - Python 3.6 support

diff --git a/README.md b/README.md
@@ -26,28 +26,22 @@ CVAT annotations                             ---> Publication, statistics etc.
 
 [(Back to top)](#dataset-management-framework-datumaro)
 
-- Dataset reading, writing, conversion in any direction. [Supported formats](https://openvinotoolkit.github.io/datumaro/docs/user-manual/supported_formats):
-  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
-  - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`)
-  - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
-  - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)
-  - [WIDER Face](http://shuoyang1213.me/WIDERFACE/) (`bboxes`)
-  - [VGGFace2](https://github.com/ox-vgg/vgg_face2) (`landmarks`, `bboxes`)
-  - [MOT sequences](https://arxiv.org/pdf/1906.04567.pdf)
-  - [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots)
-  - [ImageNet](http://image-net.org/)
+- Dataset reading, writing, conversion in any direction.
   - [CIFAR-10/100](https://www.cs.toronto.edu/~kriz/cifar.html) (`classification`)
-  - [MNIST](http://yann.lecun.com/exdb/mnist/) (`classification`)
-  - [MNIST in CSV](https://pjreddie.com/projects/mnist-in-csv/) (`classification`)
-  - [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)
   - [Cityscapes](https://www.cityscapes-dataset.com/)
-  - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`)
-  - [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format_navi) (`point cloud`)
+  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
   - [CVAT](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format)
+  - [ImageNet](http://image-net.org/)
+  - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`)
   - [LabelMe](http://labelme.csail.mit.edu/Release3.0)
-  - [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`)
-  - [Market-1501](https://www.aitribune.com/dataset/2018051063) (`person re-identification`)
   - [LFW](http://vis-www.cs.umass.edu/lfw/) (`classification`, `person re-identification`, `landmarks`)
+  - [MNIST](http://yann.lecun.com/exdb/mnist/) (`classification`)
+  - [Open Images](https://storage.googleapis.com/openimages/web/download.html)
+  - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`)
+  - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)
+  - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
+
+  Other formats and documentation for them can be found [here](https://openvinotoolkit.github.io/datumaro/docs/user-manual/supported_formats).
 - Dataset building
   - Merging multiple datasets into one
   - Dataset filtering by a custom criteria:

diff --git a/datumaro/__init__.py b/datumaro/__init__.py
@@ -1,3 +1,37 @@
-# Copyright (C) 2019-2020 Intel Corporation
+# Copyright (C) 2019-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
+
+from . import errors as errors
+from . import ops as ops
+from . import project as project
+from .components.annotation import (
+    NO_GROUP, Annotation, AnnotationType, Bbox, BinaryMaskImage, Caption,
+    Categories, Colormap, CompiledMask, CompiledMaskImage, Cuboid3d,
+    IndexMaskImage, Label, LabelCategories, Mask, MaskCategories, Points,
+    PointsCategories, Polygon, PolyLine, RgbColor, RleMask,
+)
+from .components.cli_plugin import CliPlugin
+from .components.converter import (
+    Converter, ExportErrorPolicy, FailingExportErrorPolicy,
+)
+from .components.dataset import (
+    Dataset, DatasetPatch, DatasetSubset, IDataset, ItemStatus, eager_mode,
+)
+from .components.environment import Environment, PluginRegistry
+from .components.extractor import (
+    DEFAULT_SUBSET_NAME, CategoriesInfo, DatasetItem, Extractor,
+    FailingImportErrorPolicy, IExtractor, Importer, ImportErrorPolicy,
+    ItemTransform, SourceExtractor, Transform,
+)
+from .components.hl_ops import (  # pylint: disable=redefined-builtin
+    export, filter, merge, run_model, transform, validate,
+)
+from .components.launcher import Launcher, ModelTransform
+from .components.media import ByteImage, Image, MediaElement, Video, VideoFrame
+from .components.media_manager import MediaManager
+from .components.progress_reporting import (
+    NullProgressReporter, ProgressReporter,
+)
+from .components.validator import Validator
+from .version import VERSION
diff --git a/datumaro/cli/__main__.py b/datumaro/cli/__main__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -56,17 +56,18 @@ def _make_subcommands_help(commands, help_line_start=0):
 
 def _get_known_contexts():
     return [
+        ('model', contexts.model, "Actions with models"),
         ('project', contexts.project, "Actions with projects"),
         ('source', contexts.source, "Actions with data sources"),
-        ('model', contexts.model, "Actions with models"),
+        ('util', contexts.util, "Auxillary tools and utilities"),
     ]
 
 def _get_known_commands():
     return [
         ("Project modification:", None, ''),
+        ('add', commands.add, "Add dataset"),
         ('create', commands.create, "Create empty project"),
         ('import', commands.import_, "Import dataset"),
-        ('add', commands.add, "Add dataset"),
         ('remove', commands.remove, "Remove dataset"),
 
         ("", None, ''),
@@ -78,17 +79,19 @@ def _get_known_commands():
 
         ("", None, ''),
         ("Dataset operations:", None, ''),
+        ('convert', commands.convert, "Convert dataset between formats"),
+        ('detect-format', commands.detect_format,
+            "Detect the format of a dataset"),
+        ('diff', commands.diff, "Compare datasets"),
         ('download', commands.download, "Download a publicly available dataset"),
+        ('explain', commands.explain, "Run Explainable AI algorithm for model"),
         ('export', commands.export, "Export dataset in some format"),
         ('filter', commands.filter, "Filter dataset items"),
-        ('transform', commands.transform, "Modify dataset items"),
+        ('info', commands.info, "Print dataset info"),
         ('merge', commands.merge, "Merge datasets"),
         ('patch', commands.patch, "Update dataset from another one"),
-        ('convert', commands.convert, "Convert dataset between formats"),
-        ('diff', commands.diff, "Compare datasets"),
         ('stats', commands.stats, "Compute dataset statistics"),
-        ('info', commands.info, "Print dataset info"),
-        ('explain', commands.explain, "Run Explainable AI algorithm for model"),
+        ('transform', commands.transform, "Modify dataset items"),
         ('validate', commands.validate, "Validate dataset")
     ]
 

diff --git a/datumaro/cli/commands/__init__.py b/datumaro/cli/commands/__init__.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
 # pylint: disable=redefined-builtin
 
 from . import (
-    add, checkout, commit, convert, create, diff, download, explain, export,
-    filter, import_, info, log, merge, patch, remove, stats, status, transform,
-    validate,
+    add, checkout, commit, convert, create, detect_format, diff, download,
+    explain, export, filter, import_, info, log, merge, patch, remove, stats,
+    status, transform, validate,
 )