Skip to content

Commit

Permalink
phrases now inherently incorporate wildcards. other minor tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
saltlas committed Jan 23, 2024
1 parent 9802510 commit 2c9d750
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 24 deletions.
Binary file modified commands/__pycache__/command.cpython-311.pyc
Binary file not shown.
Binary file modified commands/__pycache__/rotatecommand.cpython-311.pyc
Binary file not shown.
7 changes: 4 additions & 3 deletions commands/command.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import uuid
from utils import jsonserializer, wildcards
import json
from utils import serializer, wildcards

class Command:
"""Base class for all commands dictating default logic.
Expand Down Expand Up @@ -43,7 +42,7 @@ def action(self, timestamp, keyword):
"phrase": keyword,
"timestamp": timestamp
}
return json.dumps(event, default=jsonserializer.json_serial)
return self.serialize(event)


def finish(self):
Expand All @@ -68,4 +67,6 @@ def check_current_keyword(self, keyword):
else:
return self.current_keyword == keyword

def serialize(self, data):
return serializer.json_serialize(data)

21 changes: 15 additions & 6 deletions googlething.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
RATE = 16000
CHUNK = int(RATE / 10) # 100ms

# result stability threshold for interim mode
RESULT_STABILITY_THRESHOLD = 0.7


class ResumableMicrophoneStream:
"""Opens a recording stream as a generator yielding the audio chunks."""
Expand Down Expand Up @@ -238,7 +241,7 @@ def listen_print_loop(processor: object, mode: object, responses: object, stream


if mode == "interim":
if (result.stability == 0.0 or result.stability > 0.7) and num_chars_printed < len(transcript): # only considering relatively stable results longer than the previous transcript we've encountered
if (result.stability == 0.0 or result.stability > RESULT_STABILITY_THRESHOLD) and num_chars_printed < len(transcript): # only considering relatively stable results longer than the previous transcript we've encountered

for word in transcript[num_chars_printed:].split():
print(word)
Expand All @@ -262,7 +265,7 @@ def listen_print_loop(processor: object, mode: object, responses: object, stream

if not result.is_final:

if mode == "interim" and result.stability > 0.7:
if mode == "interim" and result.stability > RESULT_STABILITY_THRESHOLD:
stream.is_final_end_time = stream.result_end_time
stream.last_transcript_was_final = True
else:
Expand All @@ -286,6 +289,7 @@ def listen_print_loop(processor: object, mode: object, responses: object, stream

if re.search(r"\b(exit|quit)\b", transcript, re.I):
print("Exiting..")
stream.closed = True
processor.close()
break

Expand All @@ -308,12 +312,12 @@ def main() -> None:
phrases = project_config["phrases"]
transcription_mode = project_config["transcription_mode"]
language_code = project_config["language_code"] # a BCP-47 language tag
wildcards = project_config["wildcards"]
websocket_port = project_config["websocket_port"]


# Create the adaptation client
adaptation_client = speech.AdaptationClient()
phrase_set_response = phrase_utils.init_PhraseSet(adaptation_client, phrases, project_number, project_id, wildcards)
phrase_set_response = phrase_utils.init_PhraseSet(adaptation_client, phrases, project_number, project_id)
phrase_set_name = phrase_set_response.name
speech_adaptation = speech.SpeechAdaptation(phrase_set_references=[phrase_set_name])

Expand All @@ -327,8 +331,13 @@ def main() -> None:
enable_word_time_offsets=True,
)

if transcription_mode == "interim":
allow_interim_results = True
elif transcription_mode == "stable":
allow_interim_results = False

streaming_config = speech.StreamingRecognitionConfig(
config=config, interim_results=True,
config=config, interim_results=allow_interim_results,
)

mic_manager = ResumableMicrophoneStream(RATE, CHUNK)
Expand All @@ -354,7 +363,7 @@ def main() -> None:
init_time = time_utils.get_time()
print("!!", init_time) # debug
if not command_processor:
command_processor = processcommands.CommandProcessor(init_time)
command_processor = processcommands.CommandProcessor(init_time, websocket_port)
else:
command_processor.init_time = init_time

Expand Down
4 changes: 2 additions & 2 deletions processcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class CommandProcessor:
Receives strings representing voice input, matches them to command keywords
and sends relevant messages to websocket connection."""

def __init__(self, init_time):
def __init__(self, init_time, port):
self.init_time = init_time

# first keyword in each command to match to the Command object containing relevant command logic
Expand All @@ -21,7 +21,7 @@ def __init__(self, init_time):
self.active_command = None # only one active command at a time

# initializing websocket connection
self.websocketclient = WebSocketClient()
self.websocketclient = WebSocketClient(port)
print("client not blocking") # debug


Expand Down
Binary file modified utils/__pycache__/jsonserializer.cpython-311.pyc
Binary file not shown.
Binary file modified utils/__pycache__/phrase_utils.cpython-311.pyc
Binary file not shown.
Binary file modified utils/__pycache__/time_utils.cpython-311.pyc
Binary file not shown.
Binary file modified utils/__pycache__/wildcards.cpython-311.pyc
Binary file not shown.
26 changes: 15 additions & 11 deletions utils/phrase_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@
from utils import wildcards
import hashlib

def init_PhraseSet(client, phrases, project_number, project_id, wildcards_config):

# get a list of all relevant wildcard words
wildcard_terms = sum([wildcards.wildcardsdict[wildcard] for wildcard in wildcards_config], [])
def init_PhraseSet(client, phrases, project_number, project_id):

# generating a phraseset-specific string of all relevant terms as a way to tell whether two phrasesets are identical
phraseset_string = "-".join(["-".join(phrase.split(" ")) for phrase in sorted(phrases + wildcard_terms)]).lower()
phraseset_string = "-".join(["-".join(phrase.split(" ")) for phrase in sorted(phrases)]).lower()

# hashing that string into a shorter but still unique ID
phraseset_id = str(int(hashlib.sha256(phraseset_string.encode()).hexdigest(), 16) % 10**8)
Expand All @@ -21,6 +18,7 @@ def init_PhraseSet(client, phrases, project_number, project_id, wildcards_config

try:
# if we've already created this phraseset before, it'll be loaded and we don't need to create a new one
#client.delete_phrase_set({"name": f"projects/{project_number}/locations/global/phraseSets/{phraseset_id}"})
response = client.get_phrase_set({"name": f"projects/{project_number}/locations/global/phraseSets/{phraseset_id}"})
print("existing phrase set found, loading...")
return response
Expand All @@ -37,12 +35,18 @@ def init_PhraseSet(client, phrases, project_number, project_id, wildcards_config

# we add individual words in the phrase as well as the phrase itself
# e.g. "rotate", "that", "rotate that"
for word in range(len(split_phrase)):
values.add(split_phrase[word])
values.add(" ".join(split_phrase[:word + 1]))
# adding wildcard words
for term in wildcard_terms:
values.add(term)
for word_index in range(len(split_phrase)):

# wildcard handling (e.g. "select that" and "select this")
if split_phrase[word_index] in wildcards.wildcardsdict.keys():
wordlist = wildcards.wildcardsdict[split_phrase[word_index]]
else:
wordlist = [split_phrase[word_index]]

for word in wordlist:
values.add(word)
values.add(" ".join(split_phrase[:word_index] + [word]))


print(values) # debug

Expand Down
3 changes: 3 additions & 0 deletions utils/jsonserializer.py → utils/serializer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
import json

def json_serial(obj):
"""JSON serializer for objects not serializable by default json code.
Expand All @@ -8,3 +9,5 @@ def json_serial(obj):
return obj.isoformat()
raise TypeError ("Type %s not serializable" % type(obj))

def json_serialize(data):
return json.dumps(data, default=json_serial)
4 changes: 2 additions & 2 deletions websocketclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ def on_error(ws, error):
class WebSocketClient:
"""establishes non-blocking websocket client connection and
sends messages down websocket when needed"""
def __init__(self):
def __init__(self, port):


websocket.enableTrace(True)
ws = websocket.WebSocketApp("ws://localhost:8001", on_message = on_message, on_close = on_close, on_error = on_error)
ws = websocket.WebSocketApp(port, on_message = on_message, on_close = on_close, on_error = on_error)

# runs it on new thread to prevent blocking
wst = threading.Thread(target=ws.run_forever)
Expand Down

0 comments on commit 2c9d750

Please sign in to comment.