nasa-gcn · athish-thiru · Sep 26, 2024 · Sep 27, 2024
diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py
@@ -123,14 +123,18 @@ def text_to_json(notice, keywords_dict):
         notice_ra = keywords_dict["standard"]["ra"]
         ra_data = notice[notice_ra].split()
 
-        if ra_data[0] != "Undefined":
+        if ra_data[0] == "Undefined":
+            output["ra"] = None
+        else:
             output["ra"] = float(ra_data[0][:-1])
 
     if "dec" in keywords_dict["standard"]:
         notice_dec = keywords_dict["standard"]["dec"]
         dec_data = notice[notice_dec].split()
 
-        if dec_data[0] != "Undefined":
+        if dec_data[0] == "Undefined":
+            output["dec"] = None
+        else:
             output["dec"] = float(dec_data[0][:-1])
 
     if "additional" in keywords_dict:

diff --git a/gcn_classic_text_to_json/notices/konus/README.md b/gcn_classic_text_to_json/notices/konus/README.md
@@ -0,0 +1,16 @@
+# KONUS Text Conversion
+
+Parses through the table in multiple webpages associated with KONUS triggers and creates `KONUS_{sernum}.json` directory in a `konus_jsons` inside an `output` directory for each trigger where sernum in an iterative number with no relation to the triggers.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; Trig#
+- `trigger_time` &#8594; Trig_Date, Trig_Time
+- `classification` &#8594; Event
+
+### Defines the following new fields for the text notice fields
+- `lightcurve_image_url` &#8594; GIF
+- `lightcurve_textfile_url` &#8594; Text
+- `detector_number` &#8594; Det
+
+## Caveats
+- In the tables that I have been parsing, some of the fields are just empty. I've elected to skip these and not add the fields in the JSONs as that makes validation simpler.
diff --git a/gcn_classic_text_to_json/notices/konus/__init__.py b/gcn_classic_text_to_json/notices/konus/__init__.py
diff --git a/gcn_classic_text_to_json/notices/konus/__main__.py b/gcn_classic_text_to_json/notices/konus/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.parse_all_konus_webpages()
diff --git a/gcn_classic_text_to_json/notices/konus/conversion.py b/gcn_classic_text_to_json/notices/konus/conversion.py
@@ -0,0 +1,97 @@
+import json
+import os
+import re
+
+import requests
+from bs4 import BeautifulSoup
+
+
+def create_all_konus_jsons(link, sernum):
+    """Parses through the table of KONUS triggers in `link` to create their respective JSONs
+    and creates a konus_jsons directory inside an output directory.
+
+    Parameters
+    ----------
+    link: string
+        The link to be parsed.
+    sernum: int
+        An iterative number for saving the JSONs. This number has no relation with the data in the JSONs.
+
+    Returns
+    -------
+    sernum: int
+        returns sernum to be used in the next iteration of the function"""
+    output_path = "./output/konus_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    file = requests.get(link)
+    data = file.text
+
+    soup = BeautifulSoup(data, "html.parser")
+
+    rows = soup.find_all("tr")
+
+    for row in rows[1:]:
+        output_dict = {
+            "$schema": "https://gcn.nasa.gov/schema/main/gcn/notices/classic/konus/alert.schema.json"
+        }
+
+        cols = row.find_all("td")
+
+        trigger_date = cols[0].text.strip()
+        trigger_time = cols[2].text.split()[0]
+        output_dict["trigger_time"] = (
+            f"{trigger_date[:4]}-{trigger_date[4:6]}-{trigger_date[-2:]}T{trigger_time}Z"
+        )
+
+        if cols[3].text != " " and cols[3].text != "" and cols[3].text != "\n":
+            output_dict["detector_number"] = int(cols[3].text)
+
+        if cols[4].text:
+            output_dict["classification"] = {cols[4].text.strip(): 1}
+
+        if cols[5].text:
+            output_dict["id"] = [int(cols[5].text.strip())]
+
+        incomplete_image_link = cols[7].find("a").get("href")
+        output_dict["lightcurve_image_url"] = (
+            f"https://gcn.gsfc.nasa.gov/{incomplete_image_link}"
+        )
+
+        incomplete_textfile_link = cols[9].find("a").get("href")
+        output_dict["lightcurve_textfile_url"] = (
+            f"https://gcn.gsfc.nasa.gov/{incomplete_textfile_link}"
+        )
+
+        with open(f"{output_path}KONUS_{sernum}.json", "w") as f:
+            json.dump(output_dict, f)
+        sernum += 1
+
+    return sernum
+
+
+def parse_all_konus_webpages():
+    """The main konus webpage links to muliple webpages with more links.
+    This function finds them and calls create_all_konus_triggers for each"""
+
+    main_link = "https://gcn.gsfc.nasa.gov/konus_grbs.html"
+    file = requests.get(main_link)
+    data = file.text
+
+    soup = BeautifulSoup(data, "html.parser")
+
+    search_string = re.compile("grbs.html")
+    html_tags = soup.find_all("a", attrs={"href": search_string})
+
+    html_links = []
+
+    for tag in html_tags:
+        incomplete_link = tag.get("href")
+        html_links.append(f"https://gcn.gsfc.nasa.gov/{incomplete_link}")
+
+    html_links.append(main_link)
+
+    sernum = 1
+    for link in html_links:
+        sernum = create_all_konus_jsons(link, sernum)
diff --git a/gcn_classic_text_to_json/notices/near/README.md b/gcn_classic_text_to_json/notices/near/README.md
@@ -0,0 +1,10 @@
+# NEAR Text Conversion
+
+Parses through the table in multiple webpages associated with NEAR triggers and creates `NEAR_{sernum}.json` directory in a `near_jsons` inside an `output` directory for each trigger where sernum in an iterative number with no relation to the triggers.
+
+### Uses the following fields from the core schema for text notice fields
+- `trigger_time` &#8594; None given in the webpage
+
+### Defines the following new fields for the text notice fields
+- `lightcurve_image_url` &#8594; None given in the webpage
+- `lightcurve_textfile_url` &#8594; None given in the webpage
diff --git a/gcn_classic_text_to_json/notices/near/__init__.py b/gcn_classic_text_to_json/notices/near/__init__.py
diff --git a/gcn_classic_text_to_json/notices/near/__main__.py b/gcn_classic_text_to_json/notices/near/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.parse_all_near_triggers()
diff --git a/gcn_classic_text_to_json/notices/near/conversion.py b/gcn_classic_text_to_json/notices/near/conversion.py
@@ -0,0 +1,103 @@
+import json
+import os
+import re
+
+import requests
+from bs4 import BeautifulSoup
+
+
+def create_near_jsons(link, sernum):
+    """Parses through the table in `link` and creates JSONs for each row.
+    Then and creates a near_jsons directory inside an output directory
+
+    Parameters
+    ----------
+    link: string
+        The link to be parsed.
+    sernum: int
+        An iterative number for saving the JSONs. This number has no relation with the data in the JSONs.
+
+    Returns
+    -------
+    sernum: int
+        returns sernum to be used in the next iteration of the function"""
+    output_path = "./output/near_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    file = requests.get(link)
+    data = file.text
+
+    start_idx = data.find("<!XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX>")
+    start_idx = data.find("<LI>", start_idx)
+
+    while start_idx != -1:
+        output_dict = {
+            "$schema": "https://gcn.nasa.gov/schema/main/gcn/notices/classic/near/alert.schema.json"
+        }
+        end_idx = data.find("\n", start_idx)
+
+        row_data = data[start_idx:end_idx].split()
+
+        trigger_date_data = row_data[1]
+        trigger_time = row_data[4]
+
+        if trigger_date_data[:2] == "99":
+            output_dict["trigger_time"] = (
+                f"19{trigger_date_data[:2]}-{trigger_date_data[2:4]}-{trigger_date_data[-2:]}T{trigger_time}Z"
+            )
+        else:
+            output_dict["trigger_time"] = (
+                f"20{trigger_date_data[:2]}-{trigger_date_data[2:4]}-{trigger_date_data[-2:]}T{trigger_time}Z"
+            )
+
+        postscript_url_start_idx = data.find("<A", start_idx)
+        jpeg_url_start_idx = data.find("<A", postscript_url_start_idx)
+        jpeg_url_end_idx = data.find(">", jpeg_url_start_idx)
+        textfile_url_start_idx = data.find("<A", jpeg_url_start_idx)
+        textfile_url_end_idx = data.find(">", textfile_url_start_idx)
+
+        jpeg_url_incomplete = data[jpeg_url_start_idx:jpeg_url_end_idx]
+        jpeg_url = f"https://gcn.gsfc.nasa.gov/{jpeg_url_incomplete}"
+
+        textfile_url_incomplete = data[textfile_url_start_idx:textfile_url_end_idx]
+        textfile_url = f"https://gcn.gsfc.nasa.gov/{textfile_url_incomplete}"
+
+        output_dict["lightcurve_image_url"] = f"https://gcn.gsfc.nasa.gov/{jpeg_url}"
+
+        output_dict["lightcurve_textfile_url"] = (
+            f"https://gcn.gsfc.nasa.gov/{textfile_url}"
+        )
+
+        with open(f"{output_path}NEAR_{sernum}.json", "w") as f:
+            json.dump(output_dict, f)
+
+        sernum += 1
+        start_idx = data.find("<LI>", end_idx)
+
+    return sernum
+
+
+def parse_all_near_triggers():
+    """The main near webpage links to muliple webpages with more links.
+    This function finds them and calls create_all_konus_triggers for each"""
+    main_link = "https://gcn.gsfc.nasa.gov/near_grbs.html"
+    file = requests.get(main_link)
+    data = file.text
+
+    soup = BeautifulSoup(data, "html.parser")
+
+    search_string = re.compile("grbs.html")
+    html_tags = soup.find_all("a", attrs={"href": search_string})
+
+    html_links = []
+
+    for tag in html_tags:
+        incomplete_link = tag.get("href")
+        html_links.append(f"https://gcn.gsfc.nasa.gov/{incomplete_link}")
+
+    html_links.append(main_link)
+
+    sernum = 1
+    for link in html_links:
+        sernum = create_near_jsons(link, sernum)