Skip to content

Commit

Permalink
Merge branch 'main' into vgole001/issue10
Browse files Browse the repository at this point in the history
  • Loading branch information
pvgenuchten committed Jun 28, 2024
2 parents 9260ddd + 3472b1d commit f86d7a8
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 45 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,6 @@ WORKDIR /home/link-liveliness-assessment/src

EXPOSE 8000

USER linky
USER linky

ENTRYPOINT [ "python3", "-m", "uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "8000" ]
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,31 @@ python -m uvicorn api:app --reload --host 0.0.0.0 --port 8000
```
To view the service of the FastAPI on [http://127.0.0.1:8000/docs]


# Get current URL Status History
This endpoint returns the history of a specific URL.
Let say we have the status of a specific URL over time

| id | url | validation_result | timestamp |
|-----|------------------------|-------------------|-------------------------|
| 1 | https://example.com | 200 OK | 2023-01-01 10:00:00+00 |
| 2 | https://wikipedia.com | 404 Not Found | 2023-01-01 10:00:05+00 |
| 3 | https://example.com | 200 OK | 2023-01-02 11:00:00+00 |
| 4 | https://wikipedia.com | 500 Server Error | 2023-01-02 11:00:05+00 |
| 5 | https://wikipedia.com | 200 OK | 2023-01-02 11:00:10+00 |

Running the `/Single_url_status_history` endpoint for the
https://wikipedia.com and setting limit = 2 it will fetch the following result:

| id | url | validation_result | timestamp |
|-----|------------------------|-------------------|-------------------------|
| 1 | https://wikipedia.com | 500 Server Error | 2023-01-02 11:00:05+00 |
| 2 | https://wikipedia.com | 404 Not Found | 2023-01-01 10:00:05+00 |

This is the URL's history in descenting order in datetime

# Docker
=======
## Deploy `linky` at a path

You can set `ROOTPATH` env var to run the api at a path (default is at root)
Expand All @@ -70,6 +95,7 @@ export ROOTPATH=/linky
```

## Docker

A Docker instance must be running for the linkchecker command to work.

## CI/CD
Expand Down
Binary file modified src/__pycache__/api.cpython-311.pyc
Binary file not shown.
135 changes: 125 additions & 10 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Query
from dotenv import load_dotenv
from databases import Database
from typing import List
from typing import List, Optional
from pydantic import BaseModel
from urllib.parse import unquote
from datetime import datetime
import asyncpg
import logging
import os

# Load environment variables from .env file
Expand All @@ -17,19 +19,37 @@
os.environ.get("POSTGRES_PORT") + "/" + os.environ.get("POSTGRES_DB")

database = Database(DATABASE_URL)
rootpath = os.environ.get("ROOTPATH") or "/"

# FastAPI app instance
rootpath=os.environ.get("ROOTPATH") or "/"
app = FastAPI(root_path=rootpath)
app = FastAPI(
title="Linkchecker-Liveness",
summary="Evaluate the status of URLs from OGC data catalogues",
root_path=rootpath
)
logger = logging.getLogger(__name__)


# Define response model
class StatusResponse(BaseModel):
id: int # Example column, adjust based on your actual table schema
urlname: str
parentname: str
valid: str
warning: str

id: int
urlname: Optional[str]
parentname: Optional[str]
valid: Optional[str]
warning: Optional[str]

# Model to get the availability history of a specific url
class URLAvailabilityResponse(BaseModel):
url: Optional[str]
perent_url: Optional[str]
validation_valid: Optional[str]
result: Optional[str]
warning: Optional[str]
lastChecked: Optional[datetime]

class DeprecatedUrlsResponse(BaseModel):
url: Optional[str]

# Define status lists
REDIRECTION_STATUSES = [
"301 Moved Permanently",
Expand Down Expand Up @@ -110,6 +130,101 @@ async def get_timeout_urls():
data = await fetch_data(query=query)
return data

@app.get("/Single_url_status_history", response_model=List[URLAvailabilityResponse])
async def get_current_url_status_history(
url: str = Query(..., description="URL to get avalability"),
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results (default: 100, min: 1, max: 1000)")) -> List[URLAvailabilityResponse]:
query = """
SELECT
lo.urlname AS url,
lo.parentname AS parent_url,
lo.result AS result,
lo.warning AS warning,
vh.validation_result AS validation_valid,
vh.timestamp AS last_checked
FROM
linkchecker_output lo
JOIN (
SELECT
url,
validation_result,
timestamp,
ROW_NUMBER() OVER (PARTITION BY url ORDER BY timestamp DESC) as rn
FROM
validation_history
) vh ON lo.urlname = vh.url AND vh.rn = 1
WHERE (lo.urlname = :url)
LIMIT :limit
"""

try:
results = await fetch_data(query=query, values={'url': url, 'limit': limit})
logger.info(f"Query returned {len(results)} results.")

response_data = [URLAvailabilityResponse(**dict(row)) for row in results]

return response_data
except Exception as e:
logger.error(f"Error occurred: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

@app.get("/All_url_status_history", response_model=List[URLAvailabilityResponse])
async def get_all_url_status_history(
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results (default: 100, min: 1, max: 1000)")) -> List[URLAvailabilityResponse]:

query = """
SELECT
lo.urlname AS url,
lo.parentname AS parent_url,
lo.result AS result,
lo.warning AS warning,
vh.validation_result AS validation_valid,
vh.timestamp AS last_checked
FROM
linkchecker_output lo
JOIN (
SELECT
url,
validation_result,
timestamp,
ROW_NUMBER() OVER (PARTITION BY url ORDER BY timestamp DESC) as rn
FROM
validation_history
) vh ON lo.urlname = vh.url AND vh.rn = 1
ORDER BY
vh.timestamp DESC
LIMIT :limit
"""

values = {"limit": limit}

try:
results = await fetch_data(query=query, values=values)
logging.info(f"Query returned {len(results)} results.")

response_data = [URLAvailabilityResponse(**row) for row in results]

return response_data
except Exception as e:
logging.error(f"Error occurred: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

@app.get('/Deprecated URLs', response_model=List[DeprecatedUrlsResponse])
async def get_deprecated_urls():
query = """
SELECT
us.url AS url
FROM
url_status us
WHERE us.deprecated = TRUE
"""
try:
data = await fetch_data(query=query)
return data
except Exception as e:
logging.error(f"Error occurred: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

# Start the application
@app.on_event('startup')
async def startup():
Expand Down
Loading

0 comments on commit f86d7a8

Please sign in to comment.