Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Dockerfile to build image #12

Merged
merged 5 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

FROM python:3.8-slim-buster
LABEL maintainer="[email protected]"

RUN apt-get update && apt-get install --yes \
ca-certificates libexpat1 \
&& rm -rf /var/lib/apt/lists/*

RUN adduser --uid 1000 --gecos '' --disabled-password linky

ENV POSTGRES_HOST=host.docker.internal
ENV POSTGRES_PORT=5432
ENV POSTGRES_DB=postgres
ENV POSTGRES_USER=postgres
ENV POSTGRES_PASSWORD=******

WORKDIR /home/link-liveliness-assessment

RUN chown --recursive linky:linky .

# initially copy only the requirements files
COPY --chown=linky \
requirements.txt \
./

RUN pip install -U pip && \
python3 -m pip install \
-r requirements.txt \
psycopg2-binary

COPY --chown=linky . .

WORKDIR /home/link-liveliness-assessment/src

EXPOSE 8000

USER linky
16 changes: 16 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
services:
linkchecker:
build:
context: .
dockerfile: Dockerfile
container_name: linkchecker
command: python linkchecker.py
api:
build:
context: .
dockerfile: Dockerfile
command: python api.py
ports:
- "8000:8000"
depends_on:
- linkchecker
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pydantic
uvicorn
pandas
asyncpg
linkchecker
databases
Binary file modified src/__pycache__/api.cpython-311.pyc
Binary file not shown.
17 changes: 6 additions & 11 deletions src/linkchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import requests
import math
import time
import csv
import re
import os

Expand Down Expand Up @@ -108,8 +107,11 @@ def run_linkchecker(urls):
for url in urls:
# Run LinkChecker Docker command with specified user and group IDs for each URL
process = subprocess.Popen([
"docker", "run", "--rm", "-i", "-u", "1000:1000", "ghcr.io/linkchecker/linkchecker:latest",
"--verbose", "--check-extern", "--recursion-level=1", "--output=csv",
"linkchecker",
"--verbose",
"--check-extern",
"--recursion-level=1",
"--output=csv",
url + "?f=html"
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

Expand All @@ -126,8 +128,7 @@ def main():
conn, cur = setup_database()

print("Time started processing links.")
print("Loading Catalogue links...")
filename = "soil_catalogue_link.csv"
print("Loading EJP SOIL Catalogue links...")
total_pages, numbers_returned = get_pagination_info(catalogue_json_url)

# Base URL
Expand All @@ -150,13 +151,7 @@ def main():
]

# Filter out links with the specified formats
print(all_links)
print(formats_to_remove)
filtered_links = {link for link in all_links if not any(format_to_remove in (link or "") for format_to_remove in formats_to_remove)}

# Remove the existing file if it exists
if os.path.exists(filename):
os.remove(filename)

# Specify the fields to include in the CSV file
fields_to_include = ['urlname', 'parentname', 'baseref', 'valid', 'result', 'warning', 'info', 'url', 'name']
Expand Down
Loading