Skip to content

Commit

Permalink
Add script to scrape OSGL inspirations.md file #2706
Browse files Browse the repository at this point in the history
  • Loading branch information
cxong committed Sep 14, 2024
1 parent 0847abc commit 81a2acb
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 3 deletions.
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tenacity = "^8.2.2"

[tool.poetry.group.dev.dependencies]
httpx = "^0.27.0"
mistletoe = "^1.4.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
46 changes: 46 additions & 0 deletions scripts/scrape_osgl_inspirations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import re

import httpx
from mistletoe import Document
from mistletoe.block_token import Heading, List
from scripts.utils import games, originals

INSPIRATION_PATTERN = re.compile(r"(.+) \[\d+\]")
INSPIRED_PATTERN = re.compile(r"Inspired entries: (.+)")


def main():
resp = httpx.get("https://raw.githubusercontent.com/Trilarion/opensourcegames/master/inspirations.md")
doc = Document(resp.text)
# Only look at level 2 headings
children = [child for child in doc.children if not isinstance(child, Heading) or child.level == 2]
inspiration = None
osgl_games = {}
for child in children:
if isinstance(child, Heading):
inspiration = INSPIRATION_PATTERN.match(child.children[0].content).group(1)
else:
assert isinstance(child, List)
for subchild in child.children:
text = subchild.children[0].children[0].content
if matches := INSPIRED_PATTERN.match(text):
inspireds = matches.group(1).split(", ")
osgl_games[inspiration] = inspireds
# Find games and clones from OSGC
osgc_originals = set()
for original in originals():
osgc_originals.add(original["name"])
for name in original.get("names", []):
osgc_originals.add(name)
for game in osgl_games:
if game not in osgc_originals:
print(f"Missing original: {game}")
osgc_games = set(game["name"] for game in games())
for game, inspireds in osgl_games.items():
for inspired in inspireds:
if inspired not in osgc_games:
print(f"Missing clone: {inspired} (inspired by {game})")


if __name__ == "__main__":
main()
6 changes: 4 additions & 2 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@

import yaml

PROJECT_ROOT_PATH = Path(__file__).parent.parent.resolve()


def originals() -> Iterable[dict]:
for p in Path("originals").iterdir():
for p in (PROJECT_ROOT_PATH / "originals").iterdir():
if p.is_file() and p.suffix == ".yaml":
originals = yaml.safe_load(open(p, encoding="utf-8"))
for original in originals:
yield original


def games() -> Iterable[dict]:
for p in Path('games').iterdir():
for p in (PROJECT_ROOT_PATH / "games").iterdir():
if p.is_file() and p.suffix == ".yaml":
games = yaml.safe_load(open(p, encoding="utf-8"))
for game in games:
Expand Down

0 comments on commit 81a2acb

Please sign in to comment.