feat: RND-118: YOLO for TimelineLabels (#626)

Co-authored-by: micaelakaplan <[email protected]> Co-authored-by: caitlinwheeless <[email protected]>
HumanSignal · Sep 19, 2024 · 5c12977 · 5c12977
1 parent c910e30
commit 5c12977
Show file tree

Hide file tree

Showing 31 changed files with 1,833 additions and 220 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -91,7 +91,7 @@ jobs:
         run: docker compose -f label_studio_ml/examples/${{ matrix.backend_dir_name }}/docker-compose.yml up -d --build
 
       - name: Wait for stack
-        timeout-minutes: 10
+        timeout-minutes: 20
         run: |
           while [ "$(curl -s -o /dev/null -L -w ''%{http_code}'' "http://localhost:9090/health")" != "200" ]; do
             echo "=> Waiting for service to become available" && sleep 2s

diff --git a/label_studio_ml/api.py b/label_studio_ml/api.py
@@ -56,7 +56,7 @@ def _predict():
     data = request.json
     tasks = data.get('tasks')
     label_config = data.get('label_config')
-    project = data.get('project')
+    project = str(data.get('project'))
     project_id = project.split('.', 1)[0] if project else None
     params = data.get('params', {})
     context = params.pop('context', {})
@@ -123,8 +123,14 @@ def webhook():
     project_id = str(data['project']['id'])
     label_config = data['project']['label_config']
     model = MODEL_CLASS(project_id, label_config=label_config)
-    model.fit(event, data)
-    return jsonify({}), 201
+    result = model.fit(event, data)
+
+    try:
+        response = jsonify({'result': result, 'status': 'ok'})
+    except Exception as e:
+        response = jsonify({'error': str(e), 'status': 'error'})
+
+    return response, 201
 
 
 @_server.route('/health', methods=['GET'])

diff --git a/label_studio_ml/default_configs/_wsgi.py.tmpl b/label_studio_ml/default_configs/_wsgi.py.tmpl
@@ -4,30 +4,33 @@ import json
 import logging
 import logging.config
 
-logging.config.dictConfig({{
-  "version": 1,
-  "disable_existing_loggers": False,
-  "formatters": {{
-    "standard": {{
-      "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
-    }}
-  }},
-  "handlers": {{
-    "console": {{
-      "class": "logging.StreamHandler",
-      "level": os.getenv('LOG_LEVEL'),
-      "stream": "ext://sys.stdout",
-      "formatter": "standard"
-    }}
-  }},
-  "root": {{
-    "level": os.getenv('LOG_LEVEL'),
-    "handlers": [
-      "console"
-    ],
-    "propagate": True
-  }}
-}})
+# Set a default log level if LOG_LEVEL is not defined
+log_level = os.getenv("LOG_LEVEL", "INFO")
+
+logging.config.dictConfig(
+    {
+        "version": 1,
+        "disable_existing_loggers": False,  # Prevent overriding existing loggers
+        "formatters": {
+            "standard": {
+                "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+            }
+        },
+        "handlers": {
+            "console": {
+                "class": "logging.StreamHandler",
+                "level": log_level,
+                "stream": "ext://sys.stdout",
+                "formatter": "standard",
+            }
+        },
+        "root": {
+            "level": log_level,
+            "handlers": ["console"],
+            "propagate": True,
+        },
+    }
+)
 
 from label_studio_ml.api import init_app
 from {script} import {model_class}
@@ -60,7 +63,7 @@ if __name__ == "__main__":
         '-d', '--debug', dest='debug', action='store_true',
         help='Switch debug mode')
     parser.add_argument(
-        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
+        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=log_level,
         help='Logging level')
     parser.add_argument(
         '--model-dir', dest='model_dir', default=os.path.dirname(__file__),

diff --git a/label_studio_ml/examples/yolo/.dockerignore b/label_studio_ml/examples/yolo/.dockerignore
@@ -11,6 +11,7 @@
 !tests/*
 !control_models/*
 !models/*
+!utils/*
 
 # Include requirements files
 !requirements*.txt

diff --git a/label_studio_ml/examples/yolo/Dockerfile b/label_studio_ml/examples/yolo/Dockerfile
@@ -49,14 +49,14 @@ WORKDIR /app
 
 COPY . ./
 
-WORKDIR /app/models
-
 # Download the YOLO models
-RUN yolo predict model=yolov8m.pt source=/app/tests/car.jpg \
-    && yolo predict model=yolov8n.pt source=/app/tests/car.jpg \
-    && yolo predict model=yolov8n-cls.pt source=/app/tests/car.jpg \
-    && yolo predict model=yolov8n-seg.pt source=/app/tests/car.jpg
-
-WORKDIR /app
+RUN /bin/sh -c 'if [ ! -f /app/models/yolov8m.pt ]; then \
+    yolo predict model=/app/models/yolov8m.pt source=/app/tests/car.jpg \
+    && yolo predict model=/app/models/yolov8n.pt source=/app/tests/car.jpg \
+    && yolo predict model=/app/models/yolov8n-cls.pt source=/app/tests/car.jpg \
+    && yolo predict model=/app/models/yolov8n-seg.pt source=/app/tests/car.jpg; \
+    fi'
+
+ENV PYTHONPATH=/app
 
 CMD ["/app/start.sh"]
diff --git a/label_studio_ml/examples/yolo/README.md b/label_studio_ml/examples/yolo/README.md
@@ -36,16 +36,17 @@ making it easier to annotate large datasets and ensure high-quality predictions.
 
 **Supported Features**
 
-| YOLO Task Name                        | LS Control Tag                       | Prediction Supported | LS Import Supported | LS Export Supported |
-|---------------------------------------|--------------------------------------|----------------------|---------------------|---------------------|
-| Object Detection                      | `<RectangleLabels>`                  | ✅                    | YOLO, COCO          | YOLO, COCO          |
-| Oriented Bounding Boxes (OBB)         | `<RectangleLabels model_obb="true">` | ✅                    | YOLO                | YOLO                |
-| Image Instance Segmentation: Polygons | `<PolygonLabels>`                    | ✅                    | COCO                | YOLO, COCO          |
-| Image Semantic Segmentation: Masks    | `<BrushLabels>`                      | ❌                    | Native              | Native              |
-| Image Classification                  | `<Choices>`                          | ✅                    | Native              | Native              |
-| Pose Detection                        | `<KeyPoints>`                        | ✅                    | Native              | Native              |
-| Video Object Tracking                 | `<VideoRectangle>`                   | ✅                    | Native              | Native              |
-| Video Temporal Classification         | `<TimelineLabels>`                   | Coming soon          | Native              | Native              |
+| YOLO Task Name                                               | LS Control Tag                       | Prediction Supported | LS Import Supported | LS Export Supported |
+|--------------------------------------------------------------|--------------------------------------|----------------------|---------------------|---------------------|
+| Object Detection                                             | `<RectangleLabels>`                  | ✅                    | YOLO, COCO          | YOLO, COCO          |
+| Oriented Bounding Boxes (OBB)                                | `<RectangleLabels model_obb="true">` | ✅                    | YOLO                | YOLO                |
+| Image Instance Segmentation: Polygons                        | `<PolygonLabels>`                    | ✅                    | COCO                | YOLO, COCO          |
+| Image Semantic Segmentation: Masks                           | `<BrushLabels>`                      | ❌                    | Native              | Native              |
+| Image Classification                                         | `<Choices>`                          | ✅                    | Native              | Native              |
+| Pose Detection                                               | `<KeyPoints>`                        | ✅                    | Native              | Native              |
+| Video Object Tracking                                        | `<VideoRectangle>`                   | ✅                    | Native              | Native              |
+| [Video Temporal Classification](./README_TIMELINE_LABELS.md) | `<TimelineLabels>`                   | ✅                    | Native              | Native              |
+
 
 * **LS Control Tag**: Label Studio [control tag](https://labelstud.io/tags/) from the labeling configuration. 
 * **LS Import Supported**: Indicates whether Label Studio supports Import from YOLO format to Label Studio (using the LS converter).
@@ -82,7 +83,7 @@ This tutorial uses the [YOLO example](https://github.com/HumanSignal/label-studi
 
 4. Then from the **Model** page in the project settings, [connect the model](https://labelstud.io/guide/ml#Connect-the-model-to-Label-Studio). The default URL is `http://localhost:9090`. 
 
-5. Add images to Label Studio.
+5. Add images or video (depending on tasks you are going to solve) to Label Studio.
 
 6. Open any task in the Data Manager and see the predictions from the YOLO model.
 
@@ -97,11 +98,13 @@ This tutorial uses the [YOLO example](https://github.com/HumanSignal/label-studi
 
 **Control tags**
 
+- `<Choices>` - [Classification](https://labelstud.io/tags/choices); image classification task
 - `<RectangleLabels>` - [Bounding boxes](https://labelstud.io/tags/rectanglelabels); object detection task
 - `<PolygonLabels>` - [Polygons](https://labelstud.io/tags/polygonlables); segmentation task
-- `<VideoRectangle>` - [Video bounding boxes](https://labelstud.io/tags/videorectangle); video object tracking task
+- `<VideoRectangle>` - [Video bounding boxes](https://labelstud.io/tags/videorectangle); object tracking task for videos
 - `<KeyPointLabels>` - [Key points](https://labelstud.io/tags/keypointlabels); pose detection task
-- `<Choices>` - [Classification](https://labelstud.io/tags/choices)
+- `<TimelineLabels>` - [Temporal labels for videos](https://labelstud.io/tags/timelinelabels); multi-label temporal classification task for videos
+
 
 **How to skip the control tag?**
 
@@ -681,6 +684,55 @@ Small models like `yolov8n.pt` are recommended for real-time tracking, however,
 
 <br>
 
+
+## Video temporal classification using `TimelineLabels`
+
+This ML backend supports temporal multi-label video classification for the [`<TimelineLabels>` control tag](https://labelstud.io/tags/timelinelabels) in Label Studio. 
+There are two modes available:
+- **Simple:** In the simple mode, the model uses pre-trained YOLO classes to generate predictions without additional training.  
+- **Trainable:** In the [trainable mode](README_TIMELINE_LABELS.md), the model can be trained on custom labels and annotations submitted in Label Studio using few-shot learning as training is performed on a small number of annotations.  
+
+<div align="left">
+  <a href="https://www.youtube.com/watch?v=tfMn5q1tqKI" title="Video Frame Classification with YOLOv8 and Label Studio">
+    <img src="http://img.youtube.com/vi/tfMn5q1tqKI/0.jpg" alt="Video Temporal Classification video" style="width:50%;"/>
+      <br>
+    Check the video tutorial
+  </a>
+</div>
+<br/>
+
+### Labeling config
+
+```xml
+<View>
+  <Video name="video" value="$video"/>
+  <TimelineLabels 
+          name="label" toName="video" 
+          model_trainable="false" model_score_threshold="0.25">
+    <Label value="Ball" predicted_values="soccer_ball" />
+    <Label value="hamster" />
+  </TimelineLabels>
+</View>
+```
+
+### Model training
+
+For more details on using the `TimelineLabels` ML backend, including training the model 
+and adjusting neural network classifier parameters, please refer to 
+**[README_TIMELINE_LABELS.md](README_TIMELINE_LABELS.md)**.
+
+### Default model
+
+`yolov8n-cls.pt` is the default classification model for simple mode.
+
+
+<br>
+
+-------------------
+
+<br>
+
+
 ## Run the YOLO ML backend
 
 

diff --git a/label_studio_ml/examples/yolo/README_DEVELOP.md b/label_studio_ml/examples/yolo/README_DEVELOP.md
@@ -47,13 +47,19 @@ classDiagram
         +create_video_rectangles(results, path) List[Dict]
         +update_tracker_params(yaml_path: str, prefix: str) str | None
     }
+    
+    class TimelineLabelsModel {
+        +predict_regions(path: str) List[Dict]
+        +fit(event, data)
+    }
 
     ControlModel <|-- RectangleLabelsModel
     ControlModel <|-- RectangleLabelsObbModel
     ControlModel <|-- PolygonLabelsModel
     ControlModel <|-- ChoicesModel
     ControlModel <|-- KeyPointLabelsModel
     ControlModel <|-- VideoRectangleModel
+    ControlModel <|-- TimelineLabelsModel
     
 ```
 
@@ -122,6 +128,12 @@ The architecture of the project is modular and is primarily centered around inte
      - `create_video_rectangles()`: Processes the output of the tracking model to create a sequence of bounding boxes across video frames.
      - `update_tracker_params()`: Customizes the tracking parameters based on settings in Label Studio’s configuration.
 
+8. **`control_models/timelinelabels.py` (TimelineLabelsModel)**:
+    - **Purpose**: Supports the training of a YOLO model on video data incrementally by updating the model with new annotations as they are submitted.
+    - **Key Functions**:
+      - `predict_regions()`: Runs YOLO on video frames and returns the predictions.
+      - `fit()`: Placeholder method for updating the model with new annotations.
+
 ### **Module Interaction**
 
 - **Workflow**: The main workflow begins with `model.py`, which reads tasks and the Label Studio configuration to detect and instantiate the appropriate control models. These control models are responsible for making predictions using the YOLO model and converting the results into a format that Label Studio can use for annotations.