Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hhj #379

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open

hhj #379

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion label_studio_ml/examples/nemo/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

class NemoASR(LabelStudioMLBase):

def __init__(self, model_name='QuartzNet15x5Base-En', **kwargs):
def __init__(self, model_name='stt_hi_conformer_ctc_medium', **kwargs):
super(NemoASR, self).__init__(**kwargs)

# Find TextArea control tag and bind ASR model to it
Expand Down
23 changes: 17 additions & 6 deletions label_studio_ml/examples/openai/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
version: "3.8"

services:
server:
container_name: server
label-studio-ml-backend:
build: .
container_name: ls-ml-backend
environment:
- LABEL_STUDIO_ML_BACKEND_V2=true
- LOG_LEVEL=DEBUG
- OPENAI_API_KEY=<your-openai-api-key>
- PYTHONUNBUFFERED=1
ports:
- "9090:9090"
volumes:
- "./prompt.txt:/app/prompt.txt"
- .:/app
command: python openai_predictor.py

label-studio:
image: heartexlabs/label-studio:latest
container_name: label-studio
environment:
- LABEL_STUDIO_ML_BACKENDS=ml_backend:9090
ports:
- "8080:8080"
depends_on:
- label-studio-ml-backend
volumes:
- label-studio-data:/label-studio/data
115 changes: 30 additions & 85 deletions label_studio_ml/examples/openai/openai_predictor.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,40 @@
import os
import openai
import difflib
import logging

from label_studio_ml.model import LabelStudioMLBase
import logging
from typing import List, Dict, Optional
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

logger = logging.getLogger(__name__)

openai.api_key = os.environ['OPENAI_API_KEY']


class OpenAIPredictor(LabelStudioMLBase):
DEFAULT_PROMPT = os.path.join(os.path.dirname(__file__), 'prompt.txt')
class GPTIndicBackend(LabelStudioMLBase):

def __init__(self, **kwargs):
# don't forget to initialize base class...
super(OpenAIPredictor, self).__init__(**kwargs)

# Parsed label config contains only one output of <Choices> type
assert len(self.parsed_label_config) == 1
self.from_name, self.info = list(self.parsed_label_config.items())[0]
assert self.info['type'] == 'Choices'

# the model has only one textual input
assert len(self.info['to_name']) == 1
assert len(self.info['inputs']) == 1
assert self.info['inputs'][0]['type'] == 'Text'
self.to_name = self.info['to_name'][0]
self.value = self.info['inputs'][0]['value']
self.labels = self.info['labels']

self.openai_model = kwargs.get('model', 'gpt-3.5-turbo')
self.openai_max_tokens = int(kwargs.get('max_tokens', 40))
self.openai_temperature = float(kwargs.get('temperature', 0.5))
self.openai_prompt = kwargs.get('prompt', self.DEFAULT_PROMPT)
if os.path.isfile(self.openai_prompt):
with open(self.openai_prompt) as f:
self.openai_prompt = f.read()

logger.debug(
f'Initialize OpenAI API with the following parameters:'
f' model={self.openai_model}, max_tokens={self.openai_max_tokens}, temperature={self.openai_temperature},'
f' prompt={self.openai_prompt}')

def _get_prompt(self, task_data):
if os.path.isfile(self.openai_prompt):
# Read the prompt from the file
# that allows changing the prompt without restarting the server
# use it only for development
with open(self.openai_prompt) as f:
prompt = f.read()
else:
prompt = self.openai_prompt
return prompt.format(labels=self.labels, **task_data)

def _get_predicted_label(self, task_data):
# Create a prompt for the OpenAI API
prompt = self._get_prompt(task_data)
# Call OpenAI's API to create a chat completion using the GPT-3 model
response = openai.ChatCompletion.create(
model=self.openai_model,
messages=[
{"role": "user", "content": prompt} # The 'user' role is assigned to the prompt
],
max_tokens=self.openai_max_tokens, # Maximum number of tokens in the response is set to 40
n=1, # We only want one response
stop=None, # There are no specific stop sequences
temperature=self.openai_temperature, # The temperature parameter affects randomness in the output. Lower values (like 0.5) make the output more deterministic.
)
logger.debug(f'OpenAI response: {response}')
# Extract the response text from the ChatCompletion response
response_text = response.choices[0].message['content'].strip()
# Initialization for the ML backend
super(GPTIndicBackend, self).__init__(**kwargs)

# Extract the matched labels from the response text
matched_labels = []
for pred in response_text.split("\n"):
scores = list(map(lambda l: difflib.SequenceMatcher(None, pred, l).ratio(), self.labels))
matched_labels.append(self.labels[scores.index(max(scores))])

# Return the input_text along with the identified sentiment
return matched_labels
# Load the pre-trained tokenizer and model from HuggingFace
self.tokenizer = AutoTokenizer.from_pretrained("aashay96/indic-gpt")
self.model = AutoModelForCausalLM.from_pretrained("aashay96/indic-gpt")

def predict(self, tasks, **kwargs):
predictions = []

for task in tasks:
predicted_labels = self._get_predicted_label(task['data'])
result = [{
'from_name': self.from_name,
'to_name': self.to_name,
'type': 'choices',
'value': {'choices': predicted_labels}
}]
predictions.append({'result': result, 'score': 1.0})
# Extract prompt from the task data
prompt_text = task['data']['prompt']
inputs = self.tokenizer.encode(prompt_text, return_tensors="pt")

# Generate the response using the model
outputs = self.model.generate(inputs, max_length=100)
response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

# Structure the prediction result
predictions.append({
'result': [{
'from_name': 'instruction',
'to_name': 'prompt',
'type': 'textarea',
'value': {'text': [response_text[len(prompt_text):]]},
}],
'score': 1.0 # Confidence score
})

return predictions
2 changes: 1 addition & 1 deletion label_studio_ml/examples/openai/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
gunicorn==20.1.0
label-studio-ml>=1.0.9
rq==1.10.1
openai==0.27.4
transformers
2 changes: 1 addition & 1 deletion label_studio_ml/examples/segment_anything_model/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ COPY * /app/



ENV ACCESS_TOKEN=0c5e516d37ed2bc1d11ff5fc59ebaf5e0f756386
ENV ACCESS_TOKEN=be24dfbee45f8916fc2fee2d6f71da1dc9d5f109

RUN pip install opencv-python

Expand Down