diff --git a/src/api.py b/src/api.py index b5b0720..07d610b 100644 --- a/src/api.py +++ b/src/api.py @@ -119,6 +119,17 @@ async def get_status_for_url(item): data = await fetch_data(query=query, values={'item': decoded_item }) return data +# Endpoint to retrieve URLs that that timed out. Timeout is set to 5 seconds currently +@app.get('/Timeout_URLs', response_model=List[StatusResponse]) +async def get_timeout_urls(): + query = """ + SELECT DISTINCT * + FROM linkchecker_output + WHERE valid LIKE '%ReadTimeout%' OR valid LIKE '%ConnectTimeout%' + """ + data = await fetch_data(query=query) + return data + @app.get("/Single_url_status_history", response_model=List[URLAvailabilityResponse]) async def get_current_url_status_history( url: str = Query(..., description="URL to get avalability"), diff --git a/src/linkchecker.py b/src/linkchecker.py index a656cb9..8e1a7fc 100644 --- a/src/linkchecker.py +++ b/src/linkchecker.py @@ -20,8 +20,8 @@ base = os.environ.get("OGCAPI_URL") or "https://demo.pycsw.org/gisdata" collection = os.environ.get("OGCAPI_COLLECTION") or "metadata:main" -# Remove comment' -catalogue_json_url = base + "collections/" + collection + "/items?f=json" +# format catalogue path with f-string +catalogue_json_url= f"{base}/collections/{collection}/items?f=json" def setup_database(): conn = psycopg2.connect( @@ -124,6 +124,7 @@ def run_linkchecker(urls): "--verbose", "--check-extern", "--recursion-level=1", + "--timeout=5", "--output=csv", url + "?f=html" ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)