diff --git a/.editorconfig b/.editorconfig index 6389927..f5ef29b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -6,3 +6,6 @@ trim_trailing_whitespace = true [*.js] indent_size = 2 + +[*.yml] +indent_size = 2 diff --git a/.github/workflows/analyze.yml b/.github/workflows/analyze.yml new file mode 100644 index 0000000..023ca32 --- /dev/null +++ b/.github/workflows/analyze.yml @@ -0,0 +1,74 @@ +name: Analyze + +env: + TEST_PATTERN: '*justice.gov/*' + +on: + pull_request: {} + workflow_dispatch: + inputs: + threshold: + description: 'Threshold' + required: false + type: string + pattern: + description: 'Pattern' + required: false + type: string + default: '' + from: + description: 'From Time' + required: false + type: string + default: '' + to: + description: 'To Time' + required: false + type: string + default: '' + +jobs: + analyze: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + + - name: Install System Dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + gcc g++ pkg-config libxml2-dev libxslt-dev libz-dev + + - name: Install Python Dependencies + run: pip install -r requirements.txt + + - name: Download NLTK Corpora + run: | + python -m nltk.downloader stopwords + + - name: Analyze! + run: | + # FIXME: set up readability running in a background process. + # We probably need to bring the code over from + # web-monitoring-changed-terms-analysis + python generate_task_sheets.py \ + --output out \ + --after '${{ inputs.from || '240' }}' \ + --before '${{ inputs.to || '0' }}' \ + --threshold '${{ inputs.threshold || '0.25' }}' \ + --pattern '${{ inputs.pattern || env.TEST_PATTERN }}' \ + --skip-readability + + - name: Upload Results + uses: actions/upload-artifact@v4 + with: + name: output + path: out + if-no-files-found: error + # TODO: what's appropriate retention here? + # retention-days: 1