From 0f0be1ca71418ad5eccfd60ad271168029200bf3 Mon Sep 17 00:00:00 2001 From: xiaoyao9184 <6614349+xiaoyao9184@users.noreply.github.com> Date: Fri, 3 Jan 2025 23:05:41 +0800 Subject: [PATCH] Support ml backend for label-studio --- .gitignore | 1 + label/.gitignore | 3 + label/README.md | 127 ++++++++++++++++++++++ label/_wsgi.py | 122 +++++++++++++++++++++ label/label_mappings.json | 1 + label/model.py | 203 +++++++++++++++++++++++++++++++++++ label/requirements.txt | 2 + label/requirements_base.txt | 2 + label/requirements_dev.txt | 1 + label/requirements_test.txt | 3 + label/test_api.py | 94 ++++++++++++++++ label/test_images/image.jpeg | Bin 0 -> 91597 bytes 12 files changed, 559 insertions(+) create mode 100644 label/.gitignore create mode 100644 label/README.md create mode 100644 label/_wsgi.py create mode 100644 label/label_mappings.json create mode 100644 label/model.py create mode 100644 label/requirements.txt create mode 100644 label/requirements_base.txt create mode 100644 label/requirements_dev.txt create mode 100644 label/requirements_test.txt create mode 100644 label/test_api.py create mode 100644 label/test_images/image.jpeg diff --git a/.gitignore b/.gitignore index fbca225..a60da8a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ results/ +cache.db \ No newline at end of file diff --git a/label/.gitignore b/label/.gitignore new file mode 100644 index 0000000..9332d63 --- /dev/null +++ b/label/.gitignore @@ -0,0 +1,3 @@ +.file-cache/ +__pycache__/ +cache.db \ No newline at end of file diff --git a/label/README.md b/label/README.md new file mode 100644 index 0000000..c00bf77 --- /dev/null +++ b/label/README.md @@ -0,0 +1,127 @@ + + +# Surya model connection + +The [Surya](https://github.com/VikParuchuri/surya) model connection is a powerful tool that integrates the capabilities of Surya with Label Studio. It is designed to assist in machine learning labeling tasks, specifically those involving Optical Character Recognition (OCR). + +The primary function of this connection is to recognize and extract text from images, which can be a crucial step in many machine learning workflows. By automating this process, the Surya model connection can significantly increase efficiency, reducing the time and effort required for manual text extraction. + +In the context of Label Studio, this connection enhances the platform's labeling capabilities, allowing users to automatically generate labels for text in images. This can be particularly useful in tasks such as data annotation, document digitization, and more. + +## Before you begin + +Before you begin, you must install the [Label Studio ML backend](https://github.com/HumanSignal/label-studio-ml-backend?tab=readme-ov-file#quickstart). + +This tutorial uses the [`surya` example](https://github.com/xiaoyao9184/docker-surya/tree/master/label). + +## Labeling configuration + +The Surya model connection can be used with the default labeling configuration for OCR in Label Studio. This configuration typically involves defining the types of labels to be used (e.g., text, handwriting, etc.) and the regions of the image where these labels should be applied. + +When setting the labeling configuration, select the **Computer Vision > Optical Character Recognition**. This template is pre-configured for OCR tasks and includes the necessary elements for labeling text in images: + +```xml + + + + + + + + + +