Fix #5

	name: Integration test with benchmark

	on: [push]

	env:
	TESSDATA_PREFIX: "/usr/share/tesseract-ocr/5/tessdata"
	TORCH_DEVICE: "cpu"
	OCR_ENGINE: "tesseract" # So we don't have to install ghostscript, which takes a long time

	jobs:
	build:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- name: Set up Python 3.11
	uses: actions/setup-python@v4
	with:
	python-version: 3.11
	- name: Install system dependencies
	run: cat scripts/install/apt-requirements.txt \| xargs sudo apt-get install -y
	- name: Install python dependencies
	run: \|
	pip install poetry
	poetry install
	poetry uninstall torch
	poetry run pip install torch --index-url https://download.pytorch.org/whl/cpu
	- name: Download benchmark data
	run: \|
	wget https://drive.google.com/uc?export=download&id=1ktVDYPEeyHlKLaF56FnHjI5VjVnYa1xL -O benchmark_data.zip
	unzip benchmark_data.zip
	- name: Run benchmark test
	run: \|
	poetry run python benchmark.py benchmark_data/pdfs benchmark_data/references report.json
	poetry run python scripts/verify_benchmark_scores.py report.json

Provide feedback