Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KONUS and NEAR Text conversions #19

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions gcn_classic_text_to_json/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,18 @@ def text_to_json(notice, keywords_dict):
notice_ra = keywords_dict["standard"]["ra"]
ra_data = notice[notice_ra].split()

if ra_data[0] != "Undefined":
if ra_data[0] == "Undefined":
output["ra"] = None
else:
output["ra"] = float(ra_data[0][:-1])

if "dec" in keywords_dict["standard"]:
notice_dec = keywords_dict["standard"]["dec"]
dec_data = notice[notice_dec].split()

if dec_data[0] != "Undefined":
if dec_data[0] == "Undefined":
output["dec"] = None
else:
output["dec"] = float(dec_data[0][:-1])

if "additional" in keywords_dict:
Expand Down
16 changes: 16 additions & 0 deletions gcn_classic_text_to_json/notices/konus/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# KONUS Text Conversion

Parses through the table in multiple webpages associated with KONUS triggers and creates `KONUS_{sernum}.json` directory in a `konus_jsons` inside an `output` directory for each trigger where sernum in an iterative number with no relation to the triggers.

### Uses the following fields from the core schema for text notice fields
- `id` → Trig#
- `trigger_time` → Trig_Date, Trig_Time
- `classification` → Event

### Defines the following new fields for the text notice fields
- `lightcurve_image_url` → GIF
- `lightcurve_textfile_url` → Text
- `detector_number` → Det

## Caveats
- In the tables that I have been parsing, some of the fields are just empty. I've elected to skip these and not add the fields in the JSONs as that makes validation simpler.
Empty file.
4 changes: 4 additions & 0 deletions gcn_classic_text_to_json/notices/konus/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import conversion

if __name__ == "__main__":
conversion.parse_all_konus_webpages()
97 changes: 97 additions & 0 deletions gcn_classic_text_to_json/notices/konus/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import json
import os
import re

import requests
from bs4 import BeautifulSoup


def create_all_konus_jsons(link, sernum):
"""Parses through the table of KONUS triggers in `link` to create their respective JSONs
and creates a konus_jsons directory inside an output directory.

Parameters
----------
link: string
The link to be parsed.
sernum: int
An iterative number for saving the JSONs. This number has no relation with the data in the JSONs.

Returns
-------
sernum: int
returns sernum to be used in the next iteration of the function"""
output_path = "./output/konus_jsons/"
if not os.path.exists(output_path):
os.makedirs(output_path)

file = requests.get(link)
data = file.text

soup = BeautifulSoup(data, "html.parser")

rows = soup.find_all("tr")

for row in rows[1:]:
output_dict = {
"$schema": "https://gcn.nasa.gov/schema/main/gcn/notices/classic/konus/alert.schema.json"
}

cols = row.find_all("td")

trigger_date = cols[0].text.strip()
trigger_time = cols[2].text.split()[0]
output_dict["trigger_time"] = (
f"{trigger_date[:4]}-{trigger_date[4:6]}-{trigger_date[-2:]}T{trigger_time}Z"
)

if cols[3].text != " " and cols[3].text != "" and cols[3].text != "\n":
output_dict["detector_number"] = int(cols[3].text)

if cols[4].text:
output_dict["classification"] = {cols[4].text.strip(): 1}

if cols[5].text:
output_dict["id"] = [int(cols[5].text.strip())]

incomplete_image_link = cols[7].find("a").get("href")
output_dict["lightcurve_image_url"] = (
f"https://gcn.gsfc.nasa.gov/{incomplete_image_link}"
)

incomplete_textfile_link = cols[9].find("a").get("href")
output_dict["lightcurve_textfile_url"] = (
f"https://gcn.gsfc.nasa.gov/{incomplete_textfile_link}"
)

with open(f"{output_path}KONUS_{sernum}.json", "w") as f:
json.dump(output_dict, f)
sernum += 1

return sernum


def parse_all_konus_webpages():
"""The main konus webpage links to muliple webpages with more links.
This function finds them and calls create_all_konus_triggers for each"""

main_link = "https://gcn.gsfc.nasa.gov/konus_grbs.html"
file = requests.get(main_link)
data = file.text

soup = BeautifulSoup(data, "html.parser")

search_string = re.compile("grbs.html")
html_tags = soup.find_all("a", attrs={"href": search_string})

html_links = []

for tag in html_tags:
incomplete_link = tag.get("href")
html_links.append(f"https://gcn.gsfc.nasa.gov/{incomplete_link}")

html_links.append(main_link)

sernum = 1
for link in html_links:
sernum = create_all_konus_jsons(link, sernum)
10 changes: 10 additions & 0 deletions gcn_classic_text_to_json/notices/near/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# NEAR Text Conversion

Parses through the table in multiple webpages associated with NEAR triggers and creates `NEAR_{sernum}.json` directory in a `near_jsons` inside an `output` directory for each trigger where sernum in an iterative number with no relation to the triggers.

### Uses the following fields from the core schema for text notice fields
- `trigger_time` → None given in the webpage

### Defines the following new fields for the text notice fields
- `lightcurve_image_url` → None given in the webpage
- `lightcurve_textfile_url` → None given in the webpage
Empty file.
4 changes: 4 additions & 0 deletions gcn_classic_text_to_json/notices/near/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import conversion

if __name__ == "__main__":
conversion.parse_all_near_triggers()
103 changes: 103 additions & 0 deletions gcn_classic_text_to_json/notices/near/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import json
import os
import re

import requests
from bs4 import BeautifulSoup


def create_near_jsons(link, sernum):
"""Parses through the table in `link` and creates JSONs for each row.
Then and creates a near_jsons directory inside an output directory

Parameters
----------
link: string
The link to be parsed.
sernum: int
An iterative number for saving the JSONs. This number has no relation with the data in the JSONs.

Returns
-------
sernum: int
returns sernum to be used in the next iteration of the function"""
output_path = "./output/near_jsons/"
if not os.path.exists(output_path):
os.makedirs(output_path)

file = requests.get(link)
data = file.text

start_idx = data.find("<!XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX>")
start_idx = data.find("<LI>", start_idx)

while start_idx != -1:
output_dict = {
"$schema": "https://gcn.nasa.gov/schema/main/gcn/notices/classic/near/alert.schema.json"
}
end_idx = data.find("\n", start_idx)

row_data = data[start_idx:end_idx].split()

trigger_date_data = row_data[1]
trigger_time = row_data[4]

if trigger_date_data[:2] == "99":
output_dict["trigger_time"] = (
f"19{trigger_date_data[:2]}-{trigger_date_data[2:4]}-{trigger_date_data[-2:]}T{trigger_time}Z"
)
else:
output_dict["trigger_time"] = (
f"20{trigger_date_data[:2]}-{trigger_date_data[2:4]}-{trigger_date_data[-2:]}T{trigger_time}Z"
)

postscript_url_start_idx = data.find("<A", start_idx)
jpeg_url_start_idx = data.find("<A", postscript_url_start_idx)
jpeg_url_end_idx = data.find(">", jpeg_url_start_idx)
textfile_url_start_idx = data.find("<A", jpeg_url_start_idx)
textfile_url_end_idx = data.find(">", textfile_url_start_idx)

jpeg_url_incomplete = data[jpeg_url_start_idx:jpeg_url_end_idx]
jpeg_url = f"https://gcn.gsfc.nasa.gov/{jpeg_url_incomplete}"

textfile_url_incomplete = data[textfile_url_start_idx:textfile_url_end_idx]
textfile_url = f"https://gcn.gsfc.nasa.gov/{textfile_url_incomplete}"

output_dict["lightcurve_image_url"] = f"https://gcn.gsfc.nasa.gov/{jpeg_url}"

output_dict["lightcurve_textfile_url"] = (
f"https://gcn.gsfc.nasa.gov/{textfile_url}"
)

with open(f"{output_path}NEAR_{sernum}.json", "w") as f:
json.dump(output_dict, f)

sernum += 1
start_idx = data.find("<LI>", end_idx)

return sernum


def parse_all_near_triggers():
"""The main near webpage links to muliple webpages with more links.
This function finds them and calls create_all_konus_triggers for each"""
main_link = "https://gcn.gsfc.nasa.gov/near_grbs.html"
file = requests.get(main_link)
data = file.text

soup = BeautifulSoup(data, "html.parser")

search_string = re.compile("grbs.html")
html_tags = soup.find_all("a", attrs={"href": search_string})

html_links = []

for tag in html_tags:
incomplete_link = tag.get("href")
html_links.append(f"https://gcn.gsfc.nasa.gov/{incomplete_link}")

html_links.append(main_link)

sernum = 1
for link in html_links:
sernum = create_near_jsons(link, sernum)