-
Notifications
You must be signed in to change notification settings - Fork 93
134 lines (132 loc) · 4.57 KB
/
build.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
name: Build README and deploy Datasette
on:
push:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v3
# We need full history to introspect created/updated:
with:
fetch-depth: 0
path: main
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11
- uses: actions/cache@v3
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Cache Playwright browsers
uses: actions/cache@v3
with:
path: ~/.cache/ms-playwright/
key: ${{ runner.os }}-browsers
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r main/requirements.txt
- name: Install Playwright dependencies
run: |
shot-scraper install
- name: Download previous database unless REBUILD in commit message
if: |-
!contains(github.event.head_commit.message, 'REBUILD')
run: curl --fail -o main/tils.db https://s3.amazonaws.com/til.simonwillison.net/tils.db
continue-on-error: true
- name: Build database
env:
MARKDOWN_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |-
cd main
python build_database.py
- name: Soundness check
run: |-
cd main
datasette . --get / | grep "Simon Willison: TIL"
- name: Generate missing screenshots
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |-
cd main
python generate_screenshots.py
sqlite-utils vacuum tils.db
- name: Calculate embeddings and document similarity
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |-
cd main
# Fetch embeddings for documents that need them
openai-to-sqlite embeddings tils.db \
--sql 'select path, title, topic, body from til'
# Now calculate and save similarities
if sqlite-utils rows tils.db similarities --limit 1; then
# Table exists already, so only calculate new similarities
openai-to-sqlite similar tils.db \
$(git diff --name-only HEAD~10 HEAD | grep '/.*\.md$' | sed 's/\//_/g') \
--save --recalculate-for-matches --print
else
# Table does not exist, calculate for everything
openai-to-sqlite similar tils.db --all --save
fi
- name: Create sqlite-vec index for embeddings
run: |-
cd main
sqlite-utils tils.db 'drop table if exists vec_tils;'
sqlite-utils tils.db 'create virtual table vec_tils using vec0(
embedding float[1536]
);'
sqlite-utils tils.db 'insert into vec_tils(rowid, embedding)
select rowid, embedding from embeddings;
'
- name: Update README
run: |-
cd main
python update_readme.py --rewrite
cat README.md
- name: Commit and push if README changed
run: |-
cd main
git diff
git config --global user.email "[email protected]"
git config --global user.name "README-bot"
git diff --quiet || (git add README.md && git commit -m "Updated README")
git push
- name: Upload latest tils.db to the S3 bucket
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |-
s3-credentials put-object til.simonwillison.net tils.db main/tils.db
- name: Install Fly
run: |
curl -L https://fly.io/install.sh | sh
- name: Deploy Datasette using Fly
env:
FLY_API_TOKEN: ${{ secrets.FLY_TOKEN }}
run: |-
cd main
PATH=$PATH:/home/runner/.fly/bin/ datasette publish fly tils.db \
--app simonw-tils \
--metadata metadata.yaml \
--static static:static \
--install datasette-template-sql \
--install "datasette-sitemap>=1.0" \
--install "datasette-atom>=0.7" \
--install datasette-json-html \
--install beautifulsoup4 \
--install "datasette-debug-asgi>=1.1" \
--install "datasette-graphql>=2.2" \
--install datasette-block-robots \
--install datasette-llm-embed \
--install datasette-sqlite-vec \
--plugins-dir plugins \
--template-dir templates