diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py index fbb7eca..9de01cd 100644 --- a/gcn_classic_text_to_json/conversion.py +++ b/gcn_classic_text_to_json/conversion.py @@ -33,6 +33,9 @@ def parse_trigger_links(link, prefix, regex_string): The webpage with the trigger links listed. prefix: string The prefix to be added to the incomplete link. + regex_string: string + Regex string to search for while looking through links. + The prefix to be added to the incomplete link. regex_string: string Regex string to search for while looking through links. @@ -123,14 +126,18 @@ def text_to_json(notice, keywords_dict): notice_ra = keywords_dict["standard"]["ra"] ra_data = notice[notice_ra].split() - if ra_data[0] != "Undefined": + if ra_data[0] == "Undefined": + output["ra"] = None + else: output["ra"] = float(ra_data[0][:-1]) if "dec" in keywords_dict["standard"]: notice_dec = keywords_dict["standard"]["dec"] dec_data = notice[notice_dec].split() - if dec_data[0] != "Undefined": + if dec_data[0] == "Undefined": + output["dec"] = None + else: output["dec"] = float(dec_data[0][:-1]) if "additional" in keywords_dict: diff --git a/gcn_classic_text_to_json/notices/lvc/README.md b/gcn_classic_text_to_json/notices/lvc/README.md new file mode 100644 index 0000000..e1fbbf4 --- /dev/null +++ b/gcn_classic_text_to_json/notices/lvc/README.md @@ -0,0 +1,24 @@ +# LVC Text Conversion + +Parses through all the webpages with LVC text notices and creates a JSON with GCN schema keywords. Creates a `lvc_jsons` directory inside an `output` directory and saves jsons as `LVC_{serial_number}.json` where serial_number is a random iterating number with no association to the notices. + +### Uses the following fields from the core schema for text notice fields +- `id` → TRIGGER_NUM +- `alert_datetime` → NOTICE_DATE +- `trigger_time` → TRIGGER_DATE, TRIGGER_TIME +- `record_number` → SEQUENCE_NUM +- `healpix_url` → SKYMAP_FITS_URL +- `far` → FAR + +### Defines the following new fields for the text notice fields +- `eventpage_url` → EVENTPAGE_URL, +- `group` → GROUP_TYPE +- `search` → SEARCH_TYPE +- `pipeline` → PIPELINE_TYPE +- `central_frequency` → CENTRAL_FREQ +- `duration` → DURATION +- `chirp_mass` → CHIRP_MASS +- `eta` → ETA +- `max_dist` → MAX_DIST +- `classification` → PROB_BNS, PROB_NSBH, PROB_BBH, PROB_TERRES +- `properties` → PROB_NS, PROB_REMNANT, PROB_MassGap diff --git a/gcn_classic_text_to_json/notices/alexis/__init__.py b/gcn_classic_text_to_json/notices/lvc/__init__.py similarity index 100% rename from gcn_classic_text_to_json/notices/alexis/__init__.py rename to gcn_classic_text_to_json/notices/lvc/__init__.py diff --git a/gcn_classic_text_to_json/notices/snews/__main__.py b/gcn_classic_text_to_json/notices/lvc/__main__.py similarity index 56% rename from gcn_classic_text_to_json/notices/snews/__main__.py rename to gcn_classic_text_to_json/notices/lvc/__main__.py index e2a387d..5bdf3ed 100644 --- a/gcn_classic_text_to_json/notices/snews/__main__.py +++ b/gcn_classic_text_to_json/notices/lvc/__main__.py @@ -1,4 +1,4 @@ from . import conversion if __name__ == "__main__": - conversion.create_all_snews_jsons() + conversion.create_all_lvc_jsons() diff --git a/gcn_classic_text_to_json/notices/lvc/conversion.py b/gcn_classic_text_to_json/notices/lvc/conversion.py new file mode 100644 index 0000000..e57a499 --- /dev/null +++ b/gcn_classic_text_to_json/notices/lvc/conversion.py @@ -0,0 +1,143 @@ +import email +import json +import os + +import requests + +from ... import conversion + +input = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_datetime": ["TRIGGER_DATE", "TRIGGER_TIME"], + }, + "additional": { + "record_number": ("SEQUENCE_NUM", "int"), + "far": ("FAR", "float"), + "healpix_url": ("SKYMAP_FITS_URL", "string"), + "eventpage_url": ("EVENTPAGE_URL", "string"), + }, +} + +input_retract = { + "standard": { + "alert_datetime": "NOTICE_DATE", + "trigger_datetime": ["TRIGGER_DATE", "TRIGGER_TIME"], + }, + "additional": { + "record_number": ("SEQUENCE_NUM", "int"), + }, +} + + +def text_to_json_lvc(notice, input): + """Function calls text_to_json and then adds additional fields depening on whether notice is CBC or Burst. + + Parameters + ----------- + notice: dict + The text notice that is being parsed. + input: dict + Mapping between text notice keywords and JSON keywords + + Returns + ------- + dictionary + A dictionary compliant with the associated schema for the mission.""" + if notice["NOTICE_TYPE"].split()[1] == "Retraction": + output_dict = conversion.text_to_json(notice, input_retract) + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/lvc/alert.schema.json" + ) + output_dict["id"] = [notice["TRIGGER_NUM"]] + output_dict["notice_type"] = "Retraction" + return output_dict + + output_dict = conversion.text_to_json(notice, input) + + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/lvc/alert.schema.json" + ) + output_dict["mission"] = "LVK" + output_dict["messenger"] = "GW" + + output_dict["id"] = [notice["TRIGGER_NUM"]] + output_dict["notice_type"] = notice["NOTICE_TYPE"].split()[1] + + output_dict["group"] = notice["GROUP_TYPE"].split()[-1] + output_dict["search"] = notice["SEARCH_TYPE"].split()[-1] + output_dict["pipeline"] = notice["PIPELINE_TYPE"].split()[-1] + + if "CENTRAL_FREQ" in notice: + output_dict["central_frequency"] = float(notice["CENTRAL_FREQ"].split()[0]) + if "DURATION" in notice: + output_dict["duration"] = float(notice["DURATION"].split()[0]) + + if "CHIRP_MASS" in notice: + output_dict["chirp_mass"] = float(notice["CHIRP_MASS"].split()[0]) + if "ETA" in notice: + output_dict["eta"] = float(notice["ETA"].split()[0]) + if "MAX_DIST" in notice: + output_dict["max_dist"] = float(notice["MAX_DIST"].split()[0]) + + classification = {} + if "PROB_BNS" in notice: + classification["BNS"] = float(notice["PROB_BNS"].split()[0]) + if "PROB_NSBH" in notice: + classification["NSBH"] = float(notice["PROB_NSBH"].split()[0]) + if "PROB_BBH" in notice: + classification["BBH"] = float(notice["PROB_BBH"].split()[0]) + if "PROB_TERRES" in notice: + classification["Terrestrial"] = float(notice["PROB_TERRES"].split()[0]) + output_dict["p_astro"] = 1 - float(notice["PROB_TERRES"].split()[0]) + + if classification: + output_dict["classification"] = classification + + properties = {} + if "PROB_NS" in notice: + properties["HasNS"] = float(notice["PROB_NS"].split()[0]) + if "PROB_REMNANT" in notice: + properties["HasRemnant"] = float(notice["PROB_REMNANT"].split()[0]) + if "PROB_MassGap" in notice: + properties["HasMassGa[]"] = float(notice["PROB_MassGap"].split()[0]) + + if properties: + output_dict["properties"] = properties + + return output_dict + + +def create_all_lvc_jsons(): + """Creates a `lvc_json` directory and fills it with the json for all LVC triggers.""" + output_path = "./output/lvc_jsons/" + if not os.path.exists(output_path): + os.makedirs(output_path) + + archive_link = "https://gcn.gsfc.nasa.gov/lvc_events.html" + prefix = "https://gcn.gsfc.nasa.gov/" + search_string = "notices_l/.*lvc" + links_set = conversion.parse_trigger_links(archive_link, prefix, search_string) + links_list = list(links_set) + + for sernum in range(len(links_list)): + link = links_list[sernum] + data = requests.get(link).text + + start_idx = data.find("\n") + 1 + while True: + end_idx = data.find("\n \n ", start_idx) + notice_message = email.message_from_string(data[start_idx:end_idx].strip()) + comment = "\n".join(notice_message.get_all("COMMENTS")) + notice_dict = dict(notice_message) + notice_dict["COMMENTS"] = comment + + output = text_to_json_lvc(notice_dict, input) + + with open(f"{output_path}LVC_{sernum+1}.json", "w") as f: + json.dump(output, f) + + temp_start_idx = data.find("///////////", end_idx) + start_idx = data.find("\n", temp_start_idx) + if temp_start_idx == -1: + break