phrases now inherently incorporate wildcards. other minor tweaks

saltlas · Jan 23, 2024 · 2c9d750 · 2c9d750
1 parent 9802510
commit 2c9d750
Show file tree

Hide file tree

Showing 12 changed files with 41 additions and 24 deletions.
diff --git a/commands/__pycache__/command.cpython-311.pyc b/commands/__pycache__/command.cpython-311.pyc
diff --git a/commands/__pycache__/rotatecommand.cpython-311.pyc b/commands/__pycache__/rotatecommand.cpython-311.pyc
diff --git a/commands/command.py b/commands/command.py
@@ -1,6 +1,5 @@
 import uuid
-from utils import jsonserializer, wildcards
-import json
+from utils import serializer, wildcards
 
 class Command:
 	"""Base class for all commands dictating default logic. 
@@ -43,7 +42,7 @@ def action(self, timestamp, keyword):
 				"phrase": keyword,
 				"timestamp": timestamp
 			}
-		return json.dumps(event, default=jsonserializer.json_serial)		
+		return self.serialize(event)	
 
 
 	def finish(self):
@@ -68,4 +67,6 @@ def check_current_keyword(self, keyword):
 		else:
 			return self.current_keyword == keyword
 
+	def serialize(self, data):
+		return serializer.json_serialize(data)
 
diff --git a/googlething.py b/googlething.py
@@ -25,6 +25,9 @@
 RATE = 16000
 CHUNK = int(RATE / 10)  # 100ms
 
+# result stability threshold for interim mode
+RESULT_STABILITY_THRESHOLD = 0.7
+
 
 class ResumableMicrophoneStream:
     """Opens a recording stream as a generator yielding the audio chunks."""
@@ -238,7 +241,7 @@ def listen_print_loop(processor: object, mode: object, responses: object, stream
 
 
         if mode == "interim":
-            if (result.stability == 0.0 or result.stability > 0.7) and num_chars_printed < len(transcript): # only considering relatively stable results longer than the previous transcript we've encountered
+            if (result.stability == 0.0 or result.stability > RESULT_STABILITY_THRESHOLD) and num_chars_printed < len(transcript): # only considering relatively stable results longer than the previous transcript we've encountered
 
                 for word in transcript[num_chars_printed:].split():
                     print(word)
@@ -262,7 +265,7 @@ def listen_print_loop(processor: object, mode: object, responses: object, stream
 
         if not result.is_final:
 
-            if mode == "interim" and result.stability > 0.7:
+            if mode == "interim" and result.stability > RESULT_STABILITY_THRESHOLD:
                 stream.is_final_end_time = stream.result_end_time
                 stream.last_transcript_was_final = True
             else:
@@ -286,6 +289,7 @@ def listen_print_loop(processor: object, mode: object, responses: object, stream
 
             if re.search(r"\b(exit|quit)\b", transcript, re.I):
                 print("Exiting..")
+                stream.closed = True
                 processor.close()
                 break
 
@@ -308,12 +312,12 @@ def main() -> None:
     phrases = project_config["phrases"]
     transcription_mode = project_config["transcription_mode"]
     language_code = project_config["language_code"]  # a BCP-47 language tag
-    wildcards = project_config["wildcards"]
+    websocket_port = project_config["websocket_port"]
 
 
     # Create the adaptation client
     adaptation_client = speech.AdaptationClient()
-    phrase_set_response = phrase_utils.init_PhraseSet(adaptation_client, phrases, project_number, project_id, wildcards)
+    phrase_set_response = phrase_utils.init_PhraseSet(adaptation_client, phrases, project_number, project_id)
     phrase_set_name = phrase_set_response.name
     speech_adaptation = speech.SpeechAdaptation(phrase_set_references=[phrase_set_name])
 
@@ -327,8 +331,13 @@ def main() -> None:
         enable_word_time_offsets=True,
     )
 
+    if transcription_mode == "interim":
+        allow_interim_results = True
+    elif transcription_mode == "stable":
+        allow_interim_results = False
+
     streaming_config = speech.StreamingRecognitionConfig(
-        config=config, interim_results=True,
+        config=config, interim_results=allow_interim_results,
     )
 
     mic_manager = ResumableMicrophoneStream(RATE, CHUNK)
@@ -354,7 +363,7 @@ def main() -> None:
             init_time = time_utils.get_time()
             print("!!", init_time) # debug
             if not command_processor:
-                command_processor = processcommands.CommandProcessor(init_time)
+                command_processor = processcommands.CommandProcessor(init_time, websocket_port)
             else:
                 command_processor.init_time = init_time
 

diff --git a/processcommands.py b/processcommands.py
@@ -7,7 +7,7 @@ class CommandProcessor:
 	Receives strings representing voice input, matches them to command keywords
 	and sends relevant messages to websocket connection."""
 
-	def __init__(self, init_time):
+	def __init__(self, init_time, port):
 		self.init_time = init_time
 
 		# first keyword in each command to match to the Command object containing relevant command logic
@@ -21,7 +21,7 @@ def __init__(self, init_time):
 		self.active_command = None # only one active command at a time
 
 		# initializing websocket connection
-		self.websocketclient = WebSocketClient()
+		self.websocketclient = WebSocketClient(port)
 		print("client not blocking") # debug
 
 

diff --git a/utils/__pycache__/jsonserializer.cpython-311.pyc b/utils/__pycache__/jsonserializer.cpython-311.pyc
diff --git a/utils/__pycache__/phrase_utils.cpython-311.pyc b/utils/__pycache__/phrase_utils.cpython-311.pyc
diff --git a/utils/__pycache__/time_utils.cpython-311.pyc b/utils/__pycache__/time_utils.cpython-311.pyc
diff --git a/utils/__pycache__/wildcards.cpython-311.pyc b/utils/__pycache__/wildcards.cpython-311.pyc
diff --git a/utils/phrase_utils.py b/utils/phrase_utils.py
@@ -6,13 +6,10 @@
 from utils import wildcards
 import hashlib
 
-def init_PhraseSet(client, phrases, project_number, project_id, wildcards_config):
-
-	# get a list of all relevant wildcard words
-	wildcard_terms =  sum([wildcards.wildcardsdict[wildcard] for wildcard in wildcards_config], [])
+def init_PhraseSet(client, phrases, project_number, project_id):
 
 	# generating a phraseset-specific string of all relevant terms as a way to tell whether two phrasesets are identical
-	phraseset_string = "-".join(["-".join(phrase.split(" ")) for phrase in sorted(phrases + wildcard_terms)]).lower()
+	phraseset_string = "-".join(["-".join(phrase.split(" ")) for phrase in sorted(phrases)]).lower()
 
 	# hashing that string into a shorter but still unique ID
 	phraseset_id = str(int(hashlib.sha256(phraseset_string.encode()).hexdigest(), 16) % 10**8)
@@ -21,6 +18,7 @@ def init_PhraseSet(client, phrases, project_number, project_id, wildcards_config
 
 	try:
 		# if we've already created this phraseset before, it'll be loaded and we don't need to create a new one
+		#client.delete_phrase_set({"name": f"projects/{project_number}/locations/global/phraseSets/{phraseset_id}"})
 		response = client.get_phrase_set({"name": f"projects/{project_number}/locations/global/phraseSets/{phraseset_id}"})
 		print("existing phrase set found, loading...")
 		return response
@@ -37,12 +35,18 @@ def init_PhraseSet(client, phrases, project_number, project_id, wildcards_config
 
 				# we add individual words in the phrase as well as the phrase itself
 				# e.g. "rotate", "that", "rotate that"
-				for word in range(len(split_phrase)):
-					values.add(split_phrase[word])
-					values.add(" ".join(split_phrase[:word + 1]))
-			# adding wildcard words
-			for term in wildcard_terms:
-				values.add(term)
+				for word_index in range(len(split_phrase)):
+
+					# wildcard handling (e.g. "select that" and "select this")
+					if split_phrase[word_index] in wildcards.wildcardsdict.keys():
+						wordlist = wildcards.wildcardsdict[split_phrase[word_index]]
+					else:
+						wordlist = [split_phrase[word_index]]
+
+					for word in wordlist:
+						values.add(word)
+						values.add(" ".join(split_phrase[:word_index] + [word]))
+
 
 			print(values) # debug
 

diff --git a/utils/jsonserializer.py → utils/serializer.py b/utils/jsonserializer.py → utils/serializer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+import json
 
 def json_serial(obj):
     """JSON serializer for objects not serializable by default json code. 
@@ -8,3 +9,5 @@ def json_serial(obj):
         return obj.isoformat()
     raise TypeError ("Type %s not serializable" % type(obj))
 
+def json_serialize(data):
+    return json.dumps(data, default=json_serial)    
diff --git a/websocketclient.py b/websocketclient.py
@@ -14,11 +14,11 @@ def on_error(ws, error):
 class WebSocketClient:
     """establishes non-blocking websocket client connection and
     sends messages down websocket when needed"""
-    def __init__(self):
+    def __init__(self, port):
 
 
         websocket.enableTrace(True)
-        ws = websocket.WebSocketApp("ws://localhost:8001", on_message = on_message, on_close = on_close, on_error = on_error)
+        ws = websocket.WebSocketApp(port, on_message = on_message, on_close = on_close, on_error = on_error)
 
         # runs it on new thread to prevent blocking
         wst = threading.Thread(target=ws.run_forever)