HumanSignal · rukaiya-rk-24 · Aug 6, 2023 · Aug 6, 2023 · Nov 2, 2023 · Nov 2, 2023
diff --git a/label_studio_ml/examples/nemo/asr.py b/label_studio_ml/examples/nemo/asr.py
@@ -12,7 +12,7 @@
 
 class NemoASR(LabelStudioMLBase):
 
-    def __init__(self, model_name='QuartzNet15x5Base-En', **kwargs):
+    def __init__(self, model_name='stt_hi_conformer_ctc_medium', **kwargs):
         super(NemoASR, self).__init__(**kwargs)
 
         # Find TextArea control tag and bind ASR model to it

diff --git a/label_studio_ml/examples/openai/docker-compose.yml b/label_studio_ml/examples/openai/docker-compose.yml
@@ -1,14 +1,25 @@
 version: "3.8"
 
 services:
-  server:
-    container_name: server
+  label-studio-ml-backend:
     build: .
+    container_name: ls-ml-backend
     environment:
-      - LABEL_STUDIO_ML_BACKEND_V2=true
-      - LOG_LEVEL=DEBUG
-      - OPENAI_API_KEY=<your-openai-api-key>
+      - PYTHONUNBUFFERED=1
     ports:
       - "9090:9090"
     volumes:
-      - "./prompt.txt:/app/prompt.txt"
+      - .:/app
+    command: python openai_predictor.py
+
+  label-studio:
+    image: heartexlabs/label-studio:latest
+    container_name: label-studio
+    environment:
+      - LABEL_STUDIO_ML_BACKENDS=ml_backend:9090
+    ports:
+      - "8080:8080"
+    depends_on:
+      - label-studio-ml-backend
+    volumes:
+      - label-studio-data:/label-studio/data
diff --git a/label_studio_ml/examples/openai/openai_predictor.py b/label_studio_ml/examples/openai/openai_predictor.py
@@ -1,95 +1,40 @@
-import os
-import openai
-import difflib
-import logging
-
 from label_studio_ml.model import LabelStudioMLBase
+import logging
+from typing import List, Dict, Optional
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
-logger = logging.getLogger(__name__)
-
-openai.api_key = os.environ['OPENAI_API_KEY']
-
-
-class OpenAIPredictor(LabelStudioMLBase):
-    DEFAULT_PROMPT = os.path.join(os.path.dirname(__file__), 'prompt.txt')
+class GPTIndicBackend(LabelStudioMLBase):
 
     def __init__(self, **kwargs):
-        # don't forget to initialize base class...
-        super(OpenAIPredictor, self).__init__(**kwargs)
-
-        # Parsed label config contains only one output of <Choices> type
-        assert len(self.parsed_label_config) == 1
-        self.from_name, self.info = list(self.parsed_label_config.items())[0]
-        assert self.info['type'] == 'Choices'
-
-        # the model has only one textual input
-        assert len(self.info['to_name']) == 1
-        assert len(self.info['inputs']) == 1
-        assert self.info['inputs'][0]['type'] == 'Text'
-        self.to_name = self.info['to_name'][0]
-        self.value = self.info['inputs'][0]['value']
-        self.labels = self.info['labels']
-
-        self.openai_model = kwargs.get('model', 'gpt-3.5-turbo')
-        self.openai_max_tokens = int(kwargs.get('max_tokens', 40))
-        self.openai_temperature = float(kwargs.get('temperature', 0.5))
-        self.openai_prompt = kwargs.get('prompt', self.DEFAULT_PROMPT)
-        if os.path.isfile(self.openai_prompt):
-            with open(self.openai_prompt) as f:
-                self.openai_prompt = f.read()
-
-        logger.debug(
-            f'Initialize OpenAI API with the following parameters:'
-            f' model={self.openai_model}, max_tokens={self.openai_max_tokens}, temperature={self.openai_temperature},'
-            f' prompt={self.openai_prompt}')
-
-    def _get_prompt(self, task_data):
-        if os.path.isfile(self.openai_prompt):
-            # Read the prompt from the file
-            # that allows changing the prompt without restarting the server
-            # use it only for development
-            with open(self.openai_prompt) as f:
-                prompt = f.read()
-        else:
-            prompt = self.openai_prompt
-        return prompt.format(labels=self.labels, **task_data)
-
-    def _get_predicted_label(self, task_data):
-        # Create a prompt for the OpenAI API
-        prompt = self._get_prompt(task_data)
-        # Call OpenAI's API to create a chat completion using the GPT-3 model
-        response = openai.ChatCompletion.create(
-            model=self.openai_model,
-            messages=[
-                {"role": "user", "content": prompt}  # The 'user' role is assigned to the prompt
-            ],
-            max_tokens=self.openai_max_tokens,  # Maximum number of tokens in the response is set to 40
-            n=1,  # We only want one response
-            stop=None,  # There are no specific stop sequences
-            temperature=self.openai_temperature,  # The temperature parameter affects randomness in the output. Lower values (like 0.5) make the output more deterministic.
-        )
-        logger.debug(f'OpenAI response: {response}')
-        # Extract the response text from the ChatCompletion response
-        response_text = response.choices[0].message['content'].strip()
+        # Initialization for the ML backend
+        super(GPTIndicBackend, self).__init__(**kwargs)
 
-        # Extract the matched labels from the response text
-        matched_labels = []
-        for pred in response_text.split("\n"):
-            scores = list(map(lambda l: difflib.SequenceMatcher(None, pred, l).ratio(), self.labels))
-            matched_labels.append(self.labels[scores.index(max(scores))])
-
-        # Return the input_text along with the identified sentiment
-        return matched_labels
+        # Load the pre-trained tokenizer and model from HuggingFace
+        self.tokenizer = AutoTokenizer.from_pretrained("aashay96/indic-gpt")
+        self.model = AutoModelForCausalLM.from_pretrained("aashay96/indic-gpt")
 
     def predict(self, tasks, **kwargs):
         predictions = []
+
         for task in tasks:
-            predicted_labels = self._get_predicted_label(task['data'])
-            result = [{
-                'from_name': self.from_name,
-                'to_name': self.to_name,
-                'type': 'choices',
-                'value': {'choices': predicted_labels}
-            }]
-            predictions.append({'result': result, 'score': 1.0})
+            # Extract prompt from the task data
+            prompt_text = task['data']['prompt']
+            inputs = self.tokenizer.encode(prompt_text, return_tensors="pt")
+
+            # Generate the response using the model
+            outputs = self.model.generate(inputs, max_length=100)
+            response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+            # Structure the prediction result
+            predictions.append({
+                'result': [{
+                    'from_name': 'instruction',
+                    'to_name': 'prompt',
+                    'type': 'textarea',
+                    'value': {'text': [response_text[len(prompt_text):]]},
+                }],
+                'score': 1.0  # Confidence score
+            })
+
         return predictions
diff --git a/label_studio_ml/examples/openai/requirements.txt b/label_studio_ml/examples/openai/requirements.txt
@@ -1,4 +1,4 @@
 gunicorn==20.1.0
 label-studio-ml>=1.0.9
 rq==1.10.1
-openai==0.27.4
+transformers 
diff --git a/label_studio_ml/examples/segment_anything_model/Dockerfile b/label_studio_ml/examples/segment_anything_model/Dockerfile
@@ -25,7 +25,7 @@ COPY * /app/
 
 
 
-ENV ACCESS_TOKEN=0c5e516d37ed2bc1d11ff5fc59ebaf5e0f756386
+ENV ACCESS_TOKEN=be24dfbee45f8916fc2fee2d6f71da1dc9d5f109
 
 RUN pip install opencv-python