Skip to content

Commit

Permalink
Add docker-compose file
Browse files Browse the repository at this point in the history
  • Loading branch information
vgkoles committed Jun 7, 2024
1 parent dcbbf3e commit 3cf0967
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 12 deletions.
8 changes: 3 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ RUN apt-get update && apt-get install --yes \

RUN adduser --uid 1000 --gecos '' --disabled-password linky

ENV POSTGRES_HOST=localhost
ENV POSTGRES_HOST=host.docker.internal
ENV POSTGRES_PORT=5432
ENV POSTGRES_DB=postgres
ENV POSTGRES_USER=postgres
ENV POSTGRES_PASSWORD=******
ENV POSTGRES_PASSWORD=w4qu+0sj

WORKDIR /home/link-liveliness-assessment

Expand All @@ -34,6 +34,4 @@ WORKDIR /home/link-liveliness-assessment/src

EXPOSE 8000

USER linky

ENTRYPOINT [ "python3", "-m", "uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "8000" ]
USER linky
16 changes: 16 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
services:
linkchecker:
build:
context: .
dockerfile: Dockerfile
container_name: linkchecker
command: python linkchecker.py
api:
build:
context: .
dockerfile: Dockerfile
command: python api.py
ports:
- "8000:8000"
depends_on:
- linkchecker
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pydantic
uvicorn
pandas
asyncpg
linkchecker
databases
12 changes: 5 additions & 7 deletions src/linkchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,11 @@ def run_linkchecker(urls):
for url in urls:
# Run LinkChecker Docker command with specified user and group IDs for each URL
process = subprocess.Popen([
"docker", "run", "--rm", "-i", "-u", "1000:1000", "ghcr.io/linkchecker/linkchecker:latest",
"--verbose", "--check-extern", "--recursion-level=1", "--output=csv",
"linkchecker",
"--verbose",
"--check-extern",
"--recursion-level=1",
"--output=csv",
url + "?f=html"
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

Expand All @@ -123,7 +126,6 @@ def main():

print("Time started processing links.")
print("Loading EJP SOIL Catalogue links...")
filename = "soil_catalogue_link.csv"
total_pages, numbers_returned = get_pagination_info(ejp_catalogue_json_url)

# Base URL
Expand All @@ -147,10 +149,6 @@ def main():

# Filter out links with the specified formats
filtered_links = {link for link in all_links if not any(format_to_remove in link for format_to_remove in formats_to_remove)}

# Remove the existing file if it exists
if os.path.exists(filename):
os.remove(filename)

# Specify the fields to include in the CSV file
fields_to_include = ['urlname', 'parentname', 'baseref', 'valid', 'result', 'warning', 'info', 'url', 'name']
Expand Down

0 comments on commit 3cf0967

Please sign in to comment.