diff --git a/database/search-index.db b/database/search-index.db index 0194df1..842ebb0 100644 Binary files a/database/search-index.db and b/database/search-index.db differ diff --git a/main.py b/main.py index b74336f..e0798fd 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import streamlit as st +import time from manager.insert import insert_data from search.index import Search_Data @@ -29,11 +30,12 @@ if submitted2 and AddForm == True: name = st.text_input('Enter website name:') address = st.text_input('Enter website address:') - + if name and address: - insert_data(name, address) - st.success('Data inserted successfully!') - st.session_state.add_state = False + with st.spinner('Checking the given information...'): + time.sleep(1) + insert_data(name, address) + st.session_state.add_state = False elif submitted2 and not AddForm: st.session_state.add_state = True \ No newline at end of file diff --git a/manager/__pycache__/insert.cpython-312.pyc b/manager/__pycache__/insert.cpython-312.pyc index 2b6a5bc..25f2329 100644 Binary files a/manager/__pycache__/insert.cpython-312.pyc and b/manager/__pycache__/insert.cpython-312.pyc differ diff --git a/manager/insert.py b/manager/insert.py index 82016d4..ddc2a99 100644 --- a/manager/insert.py +++ b/manager/insert.py @@ -1,8 +1,45 @@ import streamlit as st - import sqlite3 import requests from bs4 import BeautifulSoup +import json + +GOOGLE_SAFE_BROWSING_API_KEY = 'API_KEY' + +def content_exists(text_content, address): + with sqlite3.connect('./database/search-index.db') as conn: + cursor = conn.cursor() + if text_content: + cursor.execute('''SELECT COUNT(*) FROM information WHERE text = ?''', (text_content,)) + else: + cursor.execute('''SELECT COUNT(*) FROM information WHERE address = ?''', (address,)) + count = cursor.fetchone()[0] + return count > 0 + +def is_content_safe(text_content): + url = 'https://safebrowsing.googleapis.com/v4/threatMatches:find?key=' + GOOGLE_SAFE_BROWSING_API_KEY + payload = { + "client": { + "clientId": "your-client-id", + "clientVersion": "1.5.2" + }, + "threatInfo": { + "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"], + "platformTypes": ["ANY_PLATFORM"], + "threatEntryTypes": ["URL"], + "threatEntries": [{"url": text_content}] + } + } + headers = { + 'Content-Type': 'application/json' + } + + response = requests.post(url, headers=headers, data=json.dumps(payload)) + if response.status_code == 200: + data = response.json() + if 'matches' in data and data['matches']: + return False + return True def insert_data(name, address): try: @@ -11,7 +48,15 @@ def insert_data(name, address): soup = BeautifulSoup(response.text, 'html.parser') text_content = "\n".join([p.text for p in soup.find_all('p')]) except requests.RequestException as e: - st.text("Error accessing or parsing the website:", e) + st.error("Error accessing or parsing the website:", e) + return + + if content_exists(text_content, address): + st.warning("Content already exists in the database.") + return + + if not is_content_safe(address): + st.warning("Unsafe content detected. Not inserting into the database.") return with sqlite3.connect('./database/search-index.db') as conn: @@ -20,5 +65,6 @@ def insert_data(name, address): cursor.execute('''INSERT INTO information (name, address, text) VALUES (?, ?, ?)''', (name, address, text_content)) conn.commit() + st.success("Data inserted successfully.") except sqlite3.Error as e: - st.text("Error inserting data into the database:", e) + st.error("Error inserting data into the database:", e)