Skip to content

Commit

Permalink
Merge pull request #441 from CoCo-Japan-pan/check-brackets-in-url
Browse files Browse the repository at this point in the history
remove brackets in embedded urls
  • Loading branch information
koba-e964 authored Oct 9, 2024
2 parents c420326 + 3b8645d commit adbff12
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion onlinejudge_verify/languages/special_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def list_doxygen_annotations(path: pathlib.Path) -> Dict[str, str]:

@functools.lru_cache(maxsize=None)
def list_embedded_urls(path: pathlib.Path) -> List[str]:
pattern = re.compile(r"""['"`]?https?://\S*""") # use a broad pattern. There are no needs to make match strict.
pattern = re.compile(r"""['"`<\(]?https?://\S*""") # use a broad pattern. There are no needs to make match strict.
with open(path, 'rb') as fh:
content = fh.read().decode()
urls = []
Expand All @@ -70,5 +70,17 @@ def list_embedded_urls(path: pathlib.Path) -> List[str]:
# Remove quotes and trailing superfluous chars around the URL
url = url[1:end_quote_pos]
break
# The URL may be written like `[atcoder](https://atcoder.jp/)` or `<https://atcoder.jp/>` in Markdown syntax.
# In this case, we need to remove brackets around the URL.
for (lbracket, rbracket) in (('<', '>'), ('(', ')')):
if url.startswith(lbracket):
end_bracket_pos = url.rfind(rbracket)
if end_bracket_pos == 0:
# Remove opening bracket from the URL like `<https://atcoder.jp/`
url = url[1:]
else:
# Remove brackets and trailing superfluous chars around the URL
url = url[1:end_bracket_pos]
break
urls.append(url)
return sorted(set(urls))

0 comments on commit adbff12

Please sign in to comment.