-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_database.py
66 lines (57 loc) · 2.05 KB
/
build_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from datetime import timezone
import git
import pathlib
import sqlite_utils
import re
root = pathlib.Path(__file__).parent.resolve()
pattern = r'date:\s+(\d{4}-\d{2}-\d{2})'
def created_changed_times(repo_path, ref="main"):
created_changed_times = {}
repo = git.Repo(repo_path, odbt=git.GitDB)
commits = reversed(list(repo.iter_commits(ref)))
for commit in commits:
dt = commit.committed_datetime
affected_files = list(commit.stats.files.keys())
for filepath in affected_files:
if filepath not in created_changed_times:
created_changed_times[filepath] = {
"created": dt.isoformat(),
"created_utc": dt.astimezone(timezone.utc).isoformat(),
}
created_changed_times[filepath].update(
{
"updated": dt.isoformat(),
"updated_utc": dt.astimezone(timezone.utc).isoformat(),
}
)
return created_changed_times
def build_database(repo_path):
all_times = created_changed_times(repo_path)
db = sqlite_utils.Database(repo_path / "til.db")
table = db.table("til", pk="path")
for filepath in root.glob("*/*.md"):
fp = filepath.open()
fp.readline()
date_line = fp.readline()
match = re.search(pattern, date_line)
extracted_date = match.group(1)
fp.readline()
fp.readline()
title = fp.readline().lstrip("#").strip()
body = fp.read().strip()
path = str(filepath.relative_to(root))
url = "https://github.com/bpugh/til/blob/main/{}".format(path)
record = {
"path": path.replace("/", "_"),
"topic": path.split("/")[0],
"title": title,
"date": extracted_date,
"url": url,
"body": body,
}
record.update(all_times[path])
table.insert(record)
if "til_fts" not in db.table_names():
table.enable_fts(["title", "body"])
if __name__ == "__main__":
build_database(root)