Skip to content

Commit

Permalink
Extract the first URL in gh-data.py
Browse files Browse the repository at this point in the history
Issue descriptions sometimes have text around URLs, or the URL in a markdown link.
  • Loading branch information
zcorpan authored Dec 2, 2024
1 parent 24e3d10 commit 1348c86
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions gh-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ def process_labels(labels):
}


def get_url(text):
# get the first url (maybe in markdown link) and remove trailing comma
m = re.search(r"\b(https?://[^\)\s]+)", text)
if m:
url = m.group()
if url.endswith(','):
url = url[:-1]
return url
return ""

def process_body(issue):
lines = issue["body"].splitlines()

Expand Down Expand Up @@ -100,6 +110,8 @@ def process_body(issue):
if line.lower().startswith(text_title):
value = line[len(text_title) :].strip()
value = re.sub(r"\[[^\]]+\]\(([^\)]+)\)", r"\1", value)
if key in ("url", "explainer", "caniuse", "bug", "webkit"):
value = get_url(value)
if value != "" and value.lower() != "n/a":
body[key] = value
break
Expand Down

0 comments on commit 1348c86

Please sign in to comment.