Adding cases to assign_cycle_ids() for when cycle_start is not suppli…

…ed, or for solo sentence types; making cycle_start an input argument.
Petrichor-Labs · Jan 17, 2021 · 8df6b5c · 8df6b5c
1 parent 1aad718
commit 8df6b5c
Showing 1 changed file with 62 additions and 19 deletions.
diff --git a/nmea_data_convert.py b/nmea_data_convert.py
@@ -27,20 +27,38 @@ def parse_and_validate_args():
     parser.add_argument("output_method",
                             choices=['csv', 'db', 'both'],
                             help="where to output data: CSV files, database, or both")
-    parser.add_argument("--drop_previous_db_tables", "--dropt",
-                            action="store_true",
-                            help="drop all previous DB tables before importing new data; only applies when output_method is 'db' or 'both'")
+    parser.add_argument("--cycle_start", "--cs",
+                            help="talker+sentence_type, e.g. 'GNRMC'; used to key off of for sentence merging, and more; "
+                                 "must appear once and only once in each cycle, and must be at the beginning of each cycle; "
+                                 "must contain date and time information for sentences to be datetime stamped")
+    parser.add_argument("--num_sentences_per_cycle", "--spc",  # Requirement for this is enforced in assign_cycle_ids()
+                            type=int,
+                            help="If the cycle_start argument is not provided, and sentences are not all of type GSV, "
+                                 "cycles will be inferred from this argument. Every num_sentences_per_cycle will be given "
+                                 "the same cycle_id starting with the first sentence. Sentence merging is based on cycle_id.")
     parser.add_argument("--backfill_datetimes", "--bfdt",
                             action="store_true",
                             help="backfill datetimes where missing by extrapolating from messages with datetime information")
+    parser.add_argument("--drop_previous_db_tables", "--dropt",
+                            action="store_true",
+                            help="drop all previous DB tables before importing new data; only applies when output_method is 'db' or 'both'")
+
+
 
     args = parser.parse_args()
 
     # Check if input file exists
-    if os.path.isfile(args.filepath):
-        return args
-    else:
-        sys.exit(f"\nFile {args.filepath} does not exist.\n\nExiting.\n")
+    if not os.path.isfile(args.filepath):
+        sys.exit(f"\nERROR: File '{args.filepath}' does not exist.\n\nExiting.\n")
+
+    # Check if num_sentences_per_cycle value is valid
+    if args.num_sentences_per_cycle is not None:
+        args.num_sentences_per_cycle = int(args.num_sentences_per_cycle)
+        if args.num_sentences_per_cycle < 1:
+            sys.exit(f"\nERROR: num_sentences_per_cycle argument '{args.num_sentences_per_cycle}' is invalid. Must be a positive "
+                        "integer greater than 0.\n\nExiting.\n")
+
+    return args
 
 
 def open_file(filepath):
@@ -120,15 +138,41 @@ def datetime_stamp_sentences(sentences, cycle_start='GNRMC'):
 
 
 # To properly assign cycle IDs, the cycle_start talker+sentence_type must appear once and only once in each cycle
-def assign_cycle_ids(dts_sentences, cycle_start='GNRMC'):
+def assign_cycle_ids(dts_sentences, args):
     # TODO: Check database for highest cycle_id and start there so that IDs will be unique across runs of this script
 
-    cycle_id = -1
+    cycle_start=args.cycle_start
+    if cycle_start:
+        cycle_start = 'GNRMC'
 
-    for sen_idx, dts_sentence in enumerate(dts_sentences):
-        if dts_sentence.sentence.talker + dts_sentence.sentence.sentence_type == cycle_start:
-            cycle_id += 1
-        dts_sentences[sen_idx].cycle_id = cycle_id
+    unique_talker_and_type_pairs = set([dts_sentence.sentence.talker + dts_sentence.sentence.sentence_type for dts_sentence in dts_sentences])
+
+    if cycle_start in unique_talker_and_type_pairs:  # If sentences contain cycle_start sentences
+        cycle_id = -1
+        for sen_idx, dts_sentence in enumerate(dts_sentences):
+            if dts_sentence.sentence.talker + dts_sentence.sentence.sentence_type == cycle_start:
+                cycle_id += 1
+            dts_sentences[sen_idx].cycle_id = cycle_id
+
+    elif len(unique_talker_and_type_pairs) == 1 and \
+        (('GPGSV' in unique_talker_and_type_pairs) or ('GLGSV' in unique_talker_and_type_pairs)):  # If sentences are exclusively 'GPGSV' xor 'GLGSV' sentences
+        cycle_id = -1
+        for sen_idx, dts_sentence in enumerate(dts_sentences):
+            if int(dts_sentence.sentence.msg_num) == 1:
+                cycle_id += 1
+            dts_sentences[sen_idx].cycle_id = cycle_id
+
+    else:
+        num_sentences_per_cycle = args.num_sentences_per_cycle
+        if num_sentences_per_cycle is None:
+            sys.exit("\n\nERROR: If an argument for cycle_start is not provided, and sentences are not exclusively "
+                     "GPGSV xor GLGSV sentences, then the num_sentences_per_cycle argument must be provided.\n\nExiting.\n")
+        cycle_id = -1
+        cycle_start_idxs = range(0, len(dts_sentences), num_sentences_per_cycle)
+        for sen_idx, dts_sentence in enumerate(dts_sentences):
+            if sen_idx in cycle_start_idxs:
+                cycle_id += 1
+            dts_sentences[sen_idx].cycle_id = cycle_id
 
     return dts_sentences
 
@@ -286,11 +330,10 @@ def sentences_to_dataframes(sentence_sets):
         sentence_is_merged = sentence_sets[set_idx][0].sentence_is_merged_from_multiple
 
         fields = sentence_sets[set_idx][0].sentence.fields
+        # If first sentence is not a merged sentence, make sure that fields allow for merged sentences
         if (sentence_type == 'GSV') and (not sentence_is_merged):
-            # If first sentence is not a merged sentence, make sure that fields allow for merged sentences
             fields = expand_GSV_fields(fields)
         elif (sentence_type == 'GSA') and (not sentence_is_merged):
-            # If first sentence is not a merged sentence, make sure that fields allow for merged sentences
             fields = expand_GSA_fields(fields)
         columns = [column_tuple[1] for column_tuple in fields]
 
@@ -541,11 +584,11 @@ def expand_GSA_fields(fields):
 
 
 # Do data processing that we will always want to do
-def process_data_common(sentences, cycle_start='GNRMC'):
+def process_data_common(sentences, args):
 
-    dts_sentences = datetime_stamp_sentences(sentences, cycle_start)
+    dts_sentences = datetime_stamp_sentences(sentences, args.cycle_start)
         # 'dts' -> 'datetime stamped'
-    dts_sentences = assign_cycle_ids(dts_sentences, cycle_start='GNRMC')
+    dts_sentences = assign_cycle_ids(dts_sentences, args)
     sentence_sets = categorize_sentences(dts_sentences)
     sentence_sets = merge_groups(sentence_sets)
     sentence_dfs  = sentences_to_dataframes(sentence_sets)
@@ -563,7 +606,7 @@ def main():
     print("done.")
 
     print("\nProcessing data... ", end="")
-    sentence_dfs = process_data_common(sentences, cycle_start='GNRMC')  # Cycle starts with 'RMC' sentence
+    sentence_dfs = process_data_common(sentences, args)  # Cycle starts with 'GNRMC' sentence
     if args.backfill_datetimes:
         backfill_datetimes(sentence_dfs, verbose=True)
     derive_data(sentence_dfs)