Skip to content

Commit

Permalink
Get url availability over time
Browse files Browse the repository at this point in the history
  • Loading branch information
vgkoles committed Jun 19, 2024
1 parent 7bde6bb commit 991bfab
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 9 deletions.
Binary file modified src/__pycache__/api.cpython-311.pyc
Binary file not shown.
107 changes: 99 additions & 8 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Query
from dotenv import load_dotenv
from databases import Database
from typing import List
from typing import List, Optional
from pydantic import BaseModel
from urllib.parse import unquote
from datetime import datetime
import asyncpg
import logging
import os

# Load environment variables from .env file
Expand All @@ -20,15 +22,25 @@

# FastAPI app instance
app = FastAPI()
logger = logging.getLogger(__name__)

# Define response model
class StatusResponse(BaseModel):
id: int # Example column, adjust based on your actual table schema
urlname: str
parentname: str
valid: str
warning: str

id: int
urlname: Optional[str]
parentname: Optional[str]
valid: Optional[str]
warning: Optional[str]

# Model to get the availability history of a specific url
class URLAvailabilityResponse(BaseModel):
url: Optional[str]
perent_url: Optional[str]
validation_valid: Optional[str]
result: Optional[str]
warning: Optional[str]
lastChecked: Optional[datetime]

# Define status lists
REDIRECTION_STATUSES = [
"301 Moved Permanently",
Expand Down Expand Up @@ -98,6 +110,85 @@ async def get_status_for_url(item):
data = await fetch_data(query=query, values={'item': decoded_item })
return data

@app.get("/Single_url_status_history", response_model=List[URLAvailabilityResponse])
async def get_current_url_status_history(
url: str = Query(..., description="URL to get avalability"),
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results (default: 100, min: 1, max: 1000)")) -> List[URLAvailabilityResponse]:
query = """
SELECT
lo.urlname AS url,
lo.parentname AS parent_url,
lo.result AS result,
lo.warning AS warning,
vh.validation_result AS validation_valid,
vh.timestamp AS last_checked
FROM
linkchecker_output lo
JOIN (
SELECT
url,
validation_result,
timestamp,
ROW_NUMBER() OVER (PARTITION BY url ORDER BY timestamp DESC) as rn
FROM
validation_history
) vh ON lo.urlname = vh.url AND vh.rn = 1
WHERE (lo.urlname = :url)
LIMIT :limit
"""

try:
results = await fetch_data(query=query, values={'url': url, 'limit': limit})
logger.info(f"Query returned {len(results)} results.")

response_data = [URLAvailabilityResponse(**dict(row)) for row in results]

return response_data
except Exception as e:
logger.error(f"Error occurred: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

@app.get("/All_url_status_history", response_model=List[URLAvailabilityResponse])
async def get_all_url_status_history(
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results (default: 100, min: 1, max: 1000)")) -> List[URLAvailabilityResponse]:

query = """
SELECT
lo.urlname AS url,
lo.parentname AS parent_url,
lo.result AS result,
lo.warning AS warning,
vh.validation_result AS validation_valid,
vh.timestamp AS last_checked
FROM
linkchecker_output lo
JOIN (
SELECT
url,
validation_result,
timestamp,
ROW_NUMBER() OVER (PARTITION BY url ORDER BY timestamp DESC) as rn
FROM
validation_history
) vh ON lo.urlname = vh.url AND vh.rn = 1
ORDER BY
vh.timestamp DESC
LIMIT :limit
"""

values = {"limit": limit}

try:
results = await fetch_data(query=query, values=values)
logging.info(f"Query returned {len(results)} results.")

response_data = [URLAvailabilityResponse(**row) for row in results]

return response_data
except Exception as e:
logging.error(f"Error occurred: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

# Start the application
@app.on_event('startup')
async def startup():
Expand Down
28 changes: 27 additions & 1 deletion src/linkchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,23 @@ def setup_database():
)
"""
cur.execute(create_table_query)


# Check if the validation_history table exists
cur.execute("SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'validation_history')")
validation_history_table_exists = cur.fetchone()[0]

if not validation_history_table_exists:
# Create the validation_history table if it doesn't exist
create_validation_history_table = """
CREATE TABLE validation_history {
id SERIAL PRIMARY KEY,
url TEXT NOT NULL,
validation_result TEXT NOT NULL,
timestamp TIMESTAMP DEFAULT CURRENT TIMESTAMP
}
"""
cur.execute(create_validation_history_table)

# Commit the changes
conn.commit()

Expand Down Expand Up @@ -121,6 +137,14 @@ def run_linkchecker(urls):
# Wait for the process to finish
process.wait()

def insert_validation_history(conn, url, validation_result):
with conn.cursor() as cur:
cur.execute(
"INSERT INTO validation_history (url, validation_result) VALUES (%s, %s)",
(url, validation_result)
)
conn.commit()

def main():
start_time = time.time() # Start timing
# Set up the database and create the table
Expand Down Expand Up @@ -172,6 +196,8 @@ def main():
"""
cur.execute(insert_query, filtered_values)
conn.commit()

insert_validation_history(conn, filtered_values[0], filtered_values[3])

print("LinkChecker output written to PostgreSQL database")

Expand Down

0 comments on commit 991bfab

Please sign in to comment.