Skip to content

Commit

Permalink
Merge branch 'vgole001/issue7'
Browse files Browse the repository at this point in the history
  • Loading branch information
pvgenuchten committed Jul 8, 2024
2 parents c0d2c27 + e07e1a1 commit 1824c64
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 105 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ python -m uvicorn api:app --reload --host 0.0.0.0 --port 8000
```
To view the service of the FastAPI on [http://127.0.0.1:8000/docs]


# Get current URL Status History
This endpoint returns the history of a specific URL.
Let say we have the status of a specific URL over time
Expand All @@ -85,7 +84,7 @@ https://wikipedia.com and setting limit = 2 it will fetch the following result:
This is the URL's history in descenting order in datetime

# Docker
=======

## Deploy `linky` at a path

You can set `ROOTPATH` env var to run the api at a path (default is at root)
Expand Down
Binary file modified src/__pycache__/api.cpython-311.pyc
Binary file not shown.
150 changes: 47 additions & 103 deletions src/linkchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,44 +33,48 @@ def setup_database():
)
cur = conn.cursor()

# Create or truncate linkchecker_output table
cur.execute("DROP TABLE IF EXISTS linkchecker_output")
create_table_query = """
CREATE TABLE linkchecker_output (
id SERIAL PRIMARY KEY,
urlname TEXT,
parentname TEXT,
baseref TEXT,
valid TEXT,
result TEXT,
warning TEXT,
info TEXT,
url TEXT,
name TEXT
)
"""
cur.execute(create_table_query)

# Create validation_history table if it doesn't exist
cur.execute("""
CREATE TABLE IF NOT EXISTS validation_history (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL,
validation_result TEXT NOT NULL,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")

# Create url_status table if it doesn't exist
cur.execute("""
CREATE TABLE IF NOT EXISTS url_status (
url TEXT PRIMARY KEY,
consecutive_failures INTEGER DEFAULT 0,
deprecated BOOLEAN DEFAULT FALSE,
last_checked TIMESTAMP
)
""")
# Check if the table exists
cur.execute("SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'linkchecker_output')")
table_exists = cur.fetchone()[0]

if table_exists:
# If the table exists, truncate it and reset the primary key sequence
cur.execute("TRUNCATE TABLE linkchecker_output RESTART IDENTITY")
else:
# If the table does not exist, create it
create_table_query = """
CREATE TABLE linkchecker_output (
id SERIAL PRIMARY KEY,
urlname TEXT,
parentname TEXT,
baseref TEXT,
valid TEXT,
result TEXT,
warning TEXT,
info TEXT,
url TEXT,
name TEXT
)
"""
cur.execute(create_table_query)

# Check if the validation_history table exists
cur.execute("SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = 'validation_history')")
validation_history_table_exists = cur.fetchone()[0]

if not validation_history_table_exists:
# Create the validation_history table if it doesn't exist
create_validation_history_table = """
CREATE TABLE validation_history (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL,
validation_result TEXT NOT NULL,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
cur.execute(create_validation_history_table)

# Commit the changes
conn.commit()
return conn, cur

Expand Down Expand Up @@ -135,73 +139,13 @@ def run_linkchecker(urls):
# Wait for the process to finish
process.wait()

def insert_validation_history(conn, url, validation_result, is_valid):
def insert_validation_history(conn, url, validation_result):
with conn.cursor() as cur:
# Insert new record in validation_history
cur.execute("""
INSERT INTO validation_history (url, validation_result)
VALUES (%s, %s)
""", (url, validation_result))

# Get current status
cur.execute("SELECT consecutive_failures, deprecated FROM url_status WHERE url = %s", (url,))
result = cur.fetchone()

if result:
consecutive_failures, deprecated = result
if not is_valid:
consecutive_failures += 1
else:
consecutive_failures = 0

deprecated = deprecated or (consecutive_failures >= MAX_FAILURES)

# Update url_status
cur.execute("""
UPDATE url_status
SET consecutive_failures = %s,
deprecated = %s,
last_checked = CURRENT_TIMESTAMP
WHERE url = %s
""", (consecutive_failures, deprecated, url))
else:
# Insert new url_status if not exists
cur.execute("""
INSERT INTO url_status (url, consecutive_failures, deprecated, last_checked)
VALUES (%s, %s, %s, CURRENT_TIMESTAMP)
""", (url, 0 if is_valid else 1, False))

cur.execute(
"INSERT INTO validation_history (url, validation_result) VALUES (%s, %s)",
(url, validation_result)
)
conn.commit()

def is_valid_status(valid_string):
# Return if status is valid or not
parts = valid_string.split()
if parts[0].isdigit():
if 200 <= int(parts[0]) < 400: # Valid HTTP status codes range
return True
return False

def get_active_urls(conn):
with conn.cursor() as cur:
cur.execute("SELECT COUNT(*) FROM validation_history")
count = cur.fetchone()[0]

if count == 0:
return None # The table is empty
else:
cur.execute("SELECT url FROM validation_history WHERE NOT deprecated")
return [row[0] for row in cur.fetchall()]

def get_all_urls(conn):
with conn.cursor() as cur:
cur.execute("SELECT COUNT(*) FROM validation_history")
count = cur.fetchone()[0]

if count == 0:
return None # The table is empty
else:
cur.execute("SELECT url FROM validation_history")
return [row[0] for row in cur.fetchall()]

def main():
start_time = time.time() # Start timing
Expand Down Expand Up @@ -265,7 +209,7 @@ def main():
cur.execute(insert_query, filtered_values)
conn.commit()

insert_validation_history(conn, filtered_values[0], filtered_values[3], is_valid)
insert_validation_history(conn, filtered_values[0], filtered_values[3])

print("LinkChecker output written to PostgreSQL database")

Expand Down

0 comments on commit 1824c64

Please sign in to comment.