openai-to-sqlite>=0.4.1 #1374
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build README and deploy Datasette | |
on: | |
push: | |
branches: | |
- main | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check out repo | |
uses: actions/checkout@v3 | |
# We need full history to introspect created/updated: | |
with: | |
fetch-depth: 0 | |
path: main | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: 3.11 | |
- uses: actions/cache@v3 | |
name: Configure pip caching | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Cache Playwright browsers | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/ms-playwright/ | |
key: ${{ runner.os }}-browsers | |
- name: Install Python dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r main/requirements.txt | |
- name: Install Playwright dependencies | |
run: | | |
shot-scraper install | |
- name: Download previous database unless REBUILD in commit message | |
if: |- | |
!contains(github.event.head_commit.message, 'REBUILD') | |
run: curl --fail -o main/tils.db https://s3.amazonaws.com/til.simonwillison.net/tils.db | |
continue-on-error: true | |
- name: Build database | |
env: | |
MARKDOWN_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: |- | |
cd main | |
python build_database.py | |
- name: Soundness check | |
run: |- | |
cd main | |
datasette . --get / | grep "Simon Willison: TIL" | |
- name: Generate missing screenshots | |
env: | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
run: |- | |
cd main | |
python generate_screenshots.py | |
sqlite-utils vacuum tils.db | |
- name: Calculate embeddings and document similarity | |
env: | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
run: |- | |
cd main | |
# Fetch embeddings for documents that need them | |
openai-to-sqlite embeddings tils.db \ | |
--sql 'select path, title, topic, body from til' | |
# Now calculate and save similarities | |
if sqlite-utils rows tils.db similarities --limit 1; then | |
# Table exists already, so only calculate new similarities | |
openai-to-sqlite similar tils.db \ | |
$(git diff --name-only HEAD~10 HEAD | grep '/.*\.md$' | sed 's/\//_/g') \ | |
--save --recalculate-for-matches --print | |
else | |
# Table does not exist, calculate for everything | |
openai-to-sqlite similar tils.db --all --save | |
fi | |
- name: Update README | |
run: |- | |
cd main | |
python update_readme.py --rewrite | |
cat README.md | |
- name: Commit and push if README changed | |
run: |- | |
cd main | |
git diff | |
git config --global user.email "[email protected]" | |
git config --global user.name "README-bot" | |
git diff --quiet || (git add README.md && git commit -m "Updated README") | |
git push | |
- name: Upload latest tils.db to the S3 bucket | |
env: | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
run: |- | |
s3-credentials put-object til.simonwillison.net tils.db main/tils.db | |
- name: Deploy Datasette using Vercel | |
env: | |
NOW_TOKEN: ${{ secrets.NOW_TOKEN }} | |
run: |- | |
cd main | |
datasette publish vercel tils.db \ | |
--token $NOW_TOKEN \ | |
--project simon-til \ | |
--scope datasette \ | |
--metadata metadata.yaml \ | |
--static static:static \ | |
--install datasette-template-sql \ | |
--install "datasette-sitemap>=1.0" \ | |
--install "datasette-atom>=0.7" \ | |
--install datasette-json-html \ | |
--install beautifulsoup4 \ | |
--install "datasette-debug-asgi>=1.1" \ | |
--install "datasette-graphql>=2.2" \ | |
--install datasette-block-robots \ | |
--install datasette-llm-embed \ | |
--plugins-dir plugins \ | |
--template-dir templates \ | |
--public |