Skip to content

Commit

Permalink
also save audio transcriptions as files
Browse files Browse the repository at this point in the history
  • Loading branch information
fatihozturkh2o committed Sep 19, 2024
1 parent 7ff6764 commit c2fb6c3
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
5 changes: 4 additions & 1 deletion openai_server/agent_prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,6 @@ def get_mermaid_renderer_helper():
def get_image_generation_helper():
imagegen_url = os.getenv("IMAGEGEN_OPENAI_BASE_URL", None)
if imagegen_url:
# TODO: When available, get the model from the url
if not os.getenv("IMAGEGEN_OPENAI_MODEL"):
os.environ["IMAGEGEN_OPENAI_MODEL"] = "flux.1-schnell"

Expand All @@ -531,7 +530,10 @@ def get_image_generation_helper():
def get_audio_transcription_helper():
stt_url = os.getenv("STT_OPENAI_BASE_URL", None)
if stt_url:
if not os.getenv("STT_OPENAI_MODEL"):
os.environ["STT_OPENAI_MODEL"] = "whisper-1"
cwd = os.path.abspath(os.getcwd())
base_path = os.getenv("H2OGPT_OPENAI_BASE_FILE_PATH", "./openai_files/")
audio_transcription = f"""\n* Audio transcription using python. Use for transcribing audio files to text.
* For an audio transcription, you are recommended to use the existing pre-built python code, E.g.:
```sh
Expand All @@ -541,6 +543,7 @@ def get_audio_transcription_helper():
```
* usage: python {cwd}/openai_server/agent_tools/audio_transcription.py [-h] --file_path FILE_PATH
* If you make an audio transcription, ensure you use python or shell code properly to generate the text file.
* By default the text file will be saved in the base directory: {base_path}, you can read the text file from there.
"""
else:
audio_transcription = (
Expand Down
21 changes: 19 additions & 2 deletions openai_server/agent_tools/audio_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

def main():
parser = argparse.ArgumentParser(description="Get transcription of an audio file")
parser.add_argument("--model", type=str, default="whisper-1", help="Model name")
# Model
parser.add_argument("--model", type=str, required=False, help="Model name")
# File name
parser.add_argument("--file_path", type=str, required=True, help="Path to the audio file")
args = parser.parse_args()
Expand All @@ -14,14 +15,30 @@ def main():
assert stt_url is not None, "STT_OPENAI_BASE_URL environment variable is not set"
stt_api_key = os.getenv('STT_OPENAI_API_KEY', 'EMPTY')

if not args.model:
stt_model = os.getenv('STT_OPENAI_MODEL')
assert stt_model is not None, "STT_OPENAI_MODEL environment variable is not set"
args.model = stt_model

# Read the audio file
audio_file = open(args.file_path, "rb")
client = OpenAI(base_url=stt_url, api_key=stt_api_key)
transcription = client.audio.transcriptions.create(
model=args.model,
file=audio_file
)
print(f"Audio file successfully transcribed: '{transcription.text}'")
# Save the image to a file
base_path = os.getenv("H2OGPT_OPENAI_BASE_FILE_PATH", "./openai_files/")
# Create the directory if it doesn't exist
if not os.path.exists(base_path):
os.makedirs(base_path)
# Get full path with base_path and audio file name. Note file_path includes the full path and the audio name at the end.
full_path = os.path.join(base_path, os.path.basename(args.file_path) + ".txt")
# Write the transcription to a file
with open(full_path, "w") as txt_file:
txt_file.write(transcription.text)
print(f"Transcription successfully saved to the path: {full_path}")
print(f"Audio file successfully transcribed as: '{transcription.text}'")


if __name__ == "__main__":
Expand Down

0 comments on commit c2fb6c3

Please sign in to comment.