From 267afb514a3882570433080ff26b57abf99c57bc Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Wed, 1 Nov 2023 13:53:14 +0100 Subject: [PATCH 001/254] comment torch out --- requirements.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 19f3bad0..1f7dff92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -191,13 +191,17 @@ tokenizers==0.10.3 # transformers tomli==2.0.1 # via setuptools-scm -torch==1.10.0 +# ------------------------------------------------------------------------ +# torch==1.10.0 +# ------------------------------------------------------------------------ # via # aset (setup.py) # sentence-transformers # stanza # torchvision -torchvision==0.11.1 +# ------------------------------------------------------------------------ +# torchvision==0.11.1 +# ------------------------------------------------------------------------ # via sentence-transformers tqdm==4.64.1 # via From b7d82d8bfe34839e5b12a0455150e87a213e8fe0 Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Wed, 1 Nov 2023 13:53:25 +0100 Subject: [PATCH 002/254] init docker files --- .dockerignore | 4 ++++ Dockerfile | 28 ++++++++++++++++++++++++++++ docker-compose.yaml | 9 +++++++++ 3 files changed, 41 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 docker-compose.yaml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..4729ef67 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +.git +.gitignore +LICENSE.md +README.md \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..80d3c2a4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM python:3.9 + +USER root +RUN mkdir /home/wannadb +WORKDIR /home/wannadb +COPY . . + +EXPOSE 8080 + +# Create virtual environment +RUN python -m venv venv +RUN source venv/bin/activate +RUN export PYTHONPATH="." + +# Install dependencies +RUN pip install --upgrade pip +RUN pip install --use-pep517 -r requirements.txt + +# installing torch manually +RUN pip install torch==1.10.0 +RUN pip install torchvision==0.11.1 + +# Run tests +RUN pip install --use-pep517 pytest +RUN pytest + +# Keep container running +RUN while true; do sleep 1000 \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..7467ed6f --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,9 @@ +version: '3.6' +services: + wannadb: + build: + context: . + dockerfile: Dockerfile + restart: always + ports: + - 8080:8080 From 203a1148506ff0dcbda6bc908d3900ae3d691bab Mon Sep 17 00:00:00 2001 From: IPowerW Date: Wed, 1 Nov 2023 18:19:28 +0100 Subject: [PATCH 003/254] fix source with . --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 80d3c2a4..751d8f78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ EXPOSE 8080 # Create virtual environment RUN python -m venv venv -RUN source venv/bin/activate +RUN . venv/bin/activate RUN export PYTHONPATH="." # Install dependencies From e53ec6e89a48fe532ccfcf325b04e591ac4f1d9a Mon Sep 17 00:00:00 2001 From: IPowerW Date: Wed, 1 Nov 2023 18:26:33 +0100 Subject: [PATCH 004/254] Revert "comment torch out" This reverts commit 267afb514a3882570433080ff26b57abf99c57bc. --- requirements.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1f7dff92..19f3bad0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -191,17 +191,13 @@ tokenizers==0.10.3 # transformers tomli==2.0.1 # via setuptools-scm -# ------------------------------------------------------------------------ -# torch==1.10.0 -# ------------------------------------------------------------------------ +torch==1.10.0 # via # aset (setup.py) # sentence-transformers # stanza # torchvision -# ------------------------------------------------------------------------ -# torchvision==0.11.1 -# ------------------------------------------------------------------------ +torchvision==0.11.1 # via sentence-transformers tqdm==4.64.1 # via From 5daf56a5af633b98e0be69aa37ca6644ad42395d Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Wed, 1 Nov 2023 19:40:20 +0100 Subject: [PATCH 005/254] added tty to keep container up --- docker-compose.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 7467ed6f..480c7ea5 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -5,5 +5,6 @@ services: context: . dockerfile: Dockerfile restart: always + tty: true ports: - 8080:8080 From 47d169e18377e9aa003c1f0dfeca4e925acac1cf Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Wed, 1 Nov 2023 19:40:32 +0100 Subject: [PATCH 006/254] added venv dir --- .dockerignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 4729ef67..8a616341 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,5 @@ .git .gitignore LICENSE.md -README.md \ No newline at end of file +README.md +venv/ From 1194a12830031126205a81e88043b0c928945abc Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Wed, 1 Nov 2023 19:40:59 +0100 Subject: [PATCH 007/254] exclude pytest | workaround for source cmd --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 80d3c2a4..2a227f07 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,9 @@ EXPOSE 8080 # Create virtual environment RUN python -m venv venv -RUN source venv/bin/activate +#RUN sudo ./venv/bin/activate +RUN ["/bin/bash", "-c", "source venv/bin/activate"] +#RUN source venv/bin/activate RUN export PYTHONPATH="." # Install dependencies @@ -22,7 +24,7 @@ RUN pip install torchvision==0.11.1 # Run tests RUN pip install --use-pep517 pytest -RUN pytest +#RUN pytest # Keep container running -RUN while true; do sleep 1000 \ No newline at end of file +#RUN while true; do sleep 1000 done \ No newline at end of file From 34c1728ae14c6182195f62c01af3b65aeba08420 Mon Sep 17 00:00:00 2001 From: IPowerW Date: Wed, 1 Nov 2023 22:25:47 +0100 Subject: [PATCH 008/254] add flask setup --- Dockerfile | 36 +++++++++++++++++++----------------- backend/app.py | 7 +++++++ backend/routes.py | 12 ++++++++++++ entrypoint.sh | 12 ++++++++++++ requirements.txt | 2 ++ 5 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 backend/app.py create mode 100644 backend/routes.py create mode 100644 entrypoint.sh diff --git a/Dockerfile b/Dockerfile index 2a227f07..2fb824b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,28 +3,30 @@ FROM python:3.9 USER root RUN mkdir /home/wannadb WORKDIR /home/wannadb -COPY . . - -EXPOSE 8080 - -# Create virtual environment -RUN python -m venv venv -#RUN sudo ./venv/bin/activate -RUN ["/bin/bash", "-c", "source venv/bin/activate"] -#RUN source venv/bin/activate -RUN export PYTHONPATH="." +COPY requirements.txt requirements.txt # Install dependencies -RUN pip install --upgrade pip -RUN pip install --use-pep517 -r requirements.txt +RUN pip install --use-pep517 torch==1.10.0 -# installing torch manually -RUN pip install torch==1.10.0 -RUN pip install torchvision==0.11.1 +RUN pip install --use-pep517 -r requirements.txt +################################## +## do not change above ## +## changes above cause ## +## long loading times ## +################################## # Run tests RUN pip install --use-pep517 pytest #RUN pytest -# Keep container running -#RUN while true; do sleep 1000 done \ No newline at end of file +#copy the rest +COPY . . + +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +EXPOSE 8080 +EXPOSE 5000 + +# Define the entrypoint.sh +CMD ["/entrypoint.sh"] \ No newline at end of file diff --git a/backend/app.py b/backend/app.py new file mode 100644 index 00000000..05b62393 --- /dev/null +++ b/backend/app.py @@ -0,0 +1,7 @@ +from flask import Flask + +app = Flask(__name__) + +@app.route("/") +def hello_world(): + return "

Hello, World!

" diff --git a/backend/routes.py b/backend/routes.py new file mode 100644 index 00000000..bb07eb12 --- /dev/null +++ b/backend/routes.py @@ -0,0 +1,12 @@ +import sys + +# Add a dummy package to sys.modules to force Flask to be imported as a package +sys.modules['flask'] = sys.modules[__name__] + +from flask import Flask + +app = Flask(__name__) + +@app.route("/") +def hello_world(): + return "

Hello, World!

" diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 00000000..c5830315 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# Create and activate the virtual environment +python -m venv venv +. venv/bin/activate +export PYTHONPATH="." + +pytest + +flask --app backend/app.py run + +sleep infinity diff --git a/requirements.txt b/requirements.txt index 19f3bad0..aec66f44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -231,3 +231,5 @@ wasabi==0.10.1 # The following packages are considered to be unsafe in a requirements file: # setuptools + +flask==3.0.0 \ No newline at end of file From ad7f1d57b04f8131e10c517fd6632174b1ec6565 Mon Sep 17 00:00:00 2001 From: IPowerW Date: Wed, 8 Nov 2023 13:41:37 +0100 Subject: [PATCH 009/254] init flask integration --- Dockerfile | 12 +++++------- README.md | 35 ++++++++++++++++++++++++++++------- app.py | 11 +++++++++++ backend/app.py | 7 ------- backend/routes.py | 12 ------------ docker-compose.yaml | 18 +++++++++--------- entrypoint.sh | 4 +--- requirements.txt | 4 +++- 8 files changed, 57 insertions(+), 46 deletions(-) create mode 100644 app.py delete mode 100644 backend/app.py delete mode 100644 backend/routes.py diff --git a/Dockerfile b/Dockerfile index 2fb824b7..2a70bc15 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,11 +3,12 @@ FROM python:3.9 USER root RUN mkdir /home/wannadb WORKDIR /home/wannadb -COPY requirements.txt requirements.txt -# Install dependencies +# install torch RUN pip install --use-pep517 torch==1.10.0 +# Install dependencies +COPY requirements.txt requirements.txt RUN pip install --use-pep517 -r requirements.txt ################################## ## do not change above ## @@ -22,11 +23,8 @@ RUN pip install --use-pep517 pytest #copy the rest COPY . . -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +RUN chmod +x entrypoint.sh -EXPOSE 8080 -EXPOSE 5000 # Define the entrypoint.sh -CMD ["/entrypoint.sh"] \ No newline at end of file +ENTRYPOINT "/home/wannadb/entrypoint.sh" \ No newline at end of file diff --git a/README.md b/README.md index e9df914c..c3e922e0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,26 @@ +# Start the docker + +beim ersten mal + +``` +docker compose build +``` + +zum weiter arbeiten + +``` +docker compose up +``` + +danach sollte das backend gestartet sein + +ihr könnt mit `code` den container attachen und dann im docker arbeiten + +git functioniert erst wenn ihr gh installiert und gh auth macht +anschließend könnt ihr wie gewohn arbeiten + +ein docker rebuild ist nur nötig wenn sich dependencies geändert haben + # WannaDB: Ad-hoc SQL Queries over Text Collections ![Document collection and corresponding table.](header_image.svg) @@ -114,25 +137,23 @@ series = {SIGMOD '22} WannaDB is dually licensed under both AGPLv3 for the free usage by end users or the embedding in Open Source projects, and a commercial license for the integration in industrial projects and closed-source tool chains. More details can be found in [our licence agreement](LICENSE.md). - ## Availability of Code & Datasets We publish the source code four our system as discussed in the papers here. Additionally, we publish code to reproduce our experiments in a separate repository (coming soon). Unfortunately, we cannot publish the datasets online due to copyright issues. We will send them via email on request to everyone interested and hope they can be of benefit for other research, too. - ## Implementation details -The core of WannaDB (extraction and matching) was previously developed by us under the name [ASET (Ad-hoc Structured Exploration of Text Collections)](https://link.tuda.systems/aset). To better reflect the whole application cycle vision we present with this paper, we switchted the name to WannaDB. +The core of WannaDB (extraction and matching) was previously developed by us under the name [ASET (Ad-hoc Structured Exploration of Text Collections)](https://link.tuda.systems/aset). To better reflect the whole application cycle vision we present with this paper, we switchted the name to WannaDB. ### Repository structure This repository is structured as follows: -* `wannadb`, `wannadb_parsql`, and `wannadb_ui` contain the implementation of ASET and the GUI. -* `scripts` contains helpers, like a stand-alone preprocessing script. -* `tests` contains pytest tests. +- `wannadb`, `wannadb_parsql`, and `wannadb_ui` contain the implementation of ASET and the GUI. +- `scripts` contains helpers, like a stand-alone preprocessing script. +- `tests` contains pytest tests. ### Architecture: Core @@ -140,7 +161,7 @@ The core implementation of WannaDB is in the `wannadb` package and implemented a **Data model** -`data` contains WannaDB's data model. The entities are `InformationNugget`s, `Attribute`s, `Document`s, and the `DocumentBase`. +`data` contains WannaDB's data model. The entities are `InformationNugget`s, `Attribute`s, `Document`s, and the `DocumentBase`. A nugget is an information piece obtained from a document. An attribute is a table column that gets populated with information from the documents. A document is a textual document, and the document base is a collection of documents and provides facilities for `BSON` serialization, consistency checks, and data access. diff --git a/app.py b/app.py new file mode 100644 index 00000000..14e2c17c --- /dev/null +++ b/app.py @@ -0,0 +1,11 @@ +from flask import Flask +from flask_cors import CORS + + +app = Flask(__name__) +CORS(app) + + +@app.route('/') +def hello_world(): # put application's code here + return 'Hello World!' diff --git a/backend/app.py b/backend/app.py deleted file mode 100644 index 05b62393..00000000 --- a/backend/app.py +++ /dev/null @@ -1,7 +0,0 @@ -from flask import Flask - -app = Flask(__name__) - -@app.route("/") -def hello_world(): - return "

Hello, World!

" diff --git a/backend/routes.py b/backend/routes.py deleted file mode 100644 index bb07eb12..00000000 --- a/backend/routes.py +++ /dev/null @@ -1,12 +0,0 @@ -import sys - -# Add a dummy package to sys.modules to force Flask to be imported as a package -sys.modules['flask'] = sys.modules[__name__] - -from flask import Flask - -app = Flask(__name__) - -@app.route("/") -def hello_world(): - return "

Hello, World!

" diff --git a/docker-compose.yaml b/docker-compose.yaml index 480c7ea5..2f434142 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,10 +1,10 @@ -version: '3.6' +version: "3.6" services: - wannadb: - build: - context: . - dockerfile: Dockerfile - restart: always - tty: true - ports: - - 8080:8080 + wannadb: + build: + context: . + dockerfile: Dockerfile + restart: always + tty: true + ports: + - "8000:8000" diff --git a/entrypoint.sh b/entrypoint.sh index c5830315..4126afb1 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -7,6 +7,4 @@ export PYTHONPATH="." pytest -flask --app backend/app.py run - -sleep infinity +gunicorn -w 4 --bind 0.0.0.0:8000 app:app \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index aec66f44..ad992121 100644 --- a/requirements.txt +++ b/requirements.txt @@ -232,4 +232,6 @@ wasabi==0.10.1 # The following packages are considered to be unsafe in a requirements file: # setuptools -flask==3.0.0 \ No newline at end of file +flask==3.0.0 +Flask_Cors==4.0.0 +gunicorn==21.2.0 \ No newline at end of file From bbf0ea2904266e415368e0c0c9eeb631b6bdf590 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 27 Nov 2023 16:37:29 +0100 Subject: [PATCH 010/254] add postgres docker --- Dockerfile => Dockerfile-backend | 0 docker-compose.yaml | 25 ++++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) rename Dockerfile => Dockerfile-backend (100%) diff --git a/Dockerfile b/Dockerfile-backend similarity index 100% rename from Dockerfile rename to Dockerfile-backend diff --git a/docker-compose.yaml b/docker-compose.yaml index 2f434142..ea96b483 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,8 +3,31 @@ services: wannadb: build: context: . - dockerfile: Dockerfile + dockerfile: Dockerfile-backend restart: always tty: true ports: - "8000:8000" + depends_on: + - postgres + + + postgres: + image: postgres + container_name: postgres-container + environment: + POSTGRES_PASSWORD: 0 + POSTGRES_DB: userManagement + networks: + - mynetwork + ports: + - "5432:5432" + volumes: + - pgdata:/var/lib/postgresql/data + +networks: + mynetwork: + driver: bridge + +volumes: + pgdata: \ No newline at end of file From 1682353bb5d93af5129bc670812d6bd7537b6fbe Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 27 Nov 2023 16:38:01 +0100 Subject: [PATCH 011/254] add config ( in the beginning mainly for jwt ) --- config.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 config.py diff --git a/config.py b/config.py new file mode 100644 index 00000000..ff7af9d4 --- /dev/null +++ b/config.py @@ -0,0 +1,7 @@ +jwtkey = "secret" + + +class JWTFormat: + def __init__(self, user: str,id:int): + self.user = user + self.id = id From ba58deaf1c20d6315dc9adf9c5116ab7f92b94e5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 27 Nov 2023 16:39:35 +0100 Subject: [PATCH 012/254] update requirements.txt --- requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ad992121..aee477a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -234,4 +234,7 @@ wasabi==0.10.1 flask==3.0.0 Flask_Cors==4.0.0 -gunicorn==21.2.0 \ No newline at end of file +gunicorn==21.2.0 +psycopg2~=2.9.9 +bcrypt~=4.0.1 +PyJWT~=2.8.0 \ No newline at end of file From 685101829e502ba520b0effd34fca1d7a6e1286e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 27 Nov 2023 16:40:10 +0100 Subject: [PATCH 013/254] add queries.py and transactions.py for db abstraction --- postgres/queries.py | 99 +++++++++++++++++++++++++++++++ postgres/transactions.py | 123 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 postgres/queries.py create mode 100644 postgres/transactions.py diff --git a/postgres/queries.py b/postgres/queries.py new file mode 100644 index 00000000..539115c4 --- /dev/null +++ b/postgres/queries.py @@ -0,0 +1,99 @@ +import psycopg2 +from psycopg2 import sql +import bcrypt + +# Replace these values with your own +DB_NAME = "userManagement" +DB_USER = "postgres" +DB_PASSWORD = "0" +DB_HOST = "127.0.0.1" +DB_PORT = "5432" + + +def connectPG(): + try: + conn = psycopg2.connect( + dbname=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + host=DB_HOST, + port=DB_PORT) + return conn + except Exception as e: + print("Connection failed because: \n", e) + + +def execute_select_query(query, params=None): + conn = None + cur = None + try: + conn = connectPG() + cur = conn.cursor() + + cur.execute(query, params) + result = cur.fetchall() + + return result if result else None + + except Exception as e: + print(f"Query execution failed for query:\n" + f"{query} \n" + f"Params: {params} \n" + f"Error: {e}") + finally: + if conn: + conn.close() + if cur: + cur.close() + + +def getUserID(user: str): + select_query = sql.SQL("SELECT id FROM users WHERE username = %s;") + return execute_select_query(select_query, (user,)) + + +def getOrganisationID(organisation_name: str): + select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") + return execute_select_query(select_query, (organisation_name,)) + + +def getMemberIDsFromOrganisationID(organisationID: int): + select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") + return execute_select_query(select_query, (organisationID,)) + + +def getOrganisationIDsFromUserId(userID: int): + select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") + return execute_select_query(select_query, (userID,)) + + +def checkPassword(user: str, password: str) -> bool: + select_query = sql.SQL("SELECT password FROM users WHERE username = %s;") + result = execute_select_query(select_query, (user,)) + try: + if result[0]: + stored_password = result[0].tobytes() # sketchy conversion but works + return bcrypt.checkpw(password.encode('utf-8'), stored_password) + + return False + + except Exception as e: + print("checkPassword failed because: \n", e) + return False + + +def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: + select_query = sql.SQL("SELECT membership from membership " + "where userid == (SELECT id from users where username == (%s)) " + "and " + "organisationid == (Select id from organisations where name == (%s))") + + result = execute_select_query(select_query, (organisationName, userName)) + try: + if result[0]: + authorisation = result[0] + return int(authorisation) # sketchy conversion but works + + except Exception as e: + print("checkOrganisationAuthorisation failed because: \n", e) + return 99 diff --git a/postgres/transactions.py b/postgres/transactions.py new file mode 100644 index 00000000..77aa125d --- /dev/null +++ b/postgres/transactions.py @@ -0,0 +1,123 @@ +import psycopg2 +from psycopg2 import sql +import bcrypt +import jwt +from config import jwtkey, JWTFormat +from postgres.queries import checkPassword + +# Replace these values with your own +DB_NAME = "userManagement" +DB_USER = "postgres" +DB_PASSWORD = "0" +DB_HOST = "127.0.0.1" +DB_PORT = "5432" + + +def connectPG(): + try: + conn = psycopg2.connect( + dbname=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + host=DB_HOST, + port=DB_PORT) + return conn + except Exception as e: + print("Connection failed because: \n", e) + + +def execute_query(query, params=None, commit=False): + conn = None + cur = None + try: + conn = connectPG() + cur = conn.cursor() + + cur.execute(query, params) + + if commit: + conn.commit() + + except Exception as e: + raise Exception(f"Query execution failed for query: {query} \nParams: {params} \nError: {e}") + + finally: + if conn: + conn.close() + if cur: + cur.close() + + +def addUser(user: str, password: str): + try: + pwBytes = password.encode('utf-8') + salt = bcrypt.gensalt() + pwHash = bcrypt.hashpw(pwBytes, salt) + + insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s);") + data_to_insert = (user, pwHash) + execute_query(insert_data_query, data_to_insert, commit=True) + + + except Exception as e: + print("addUser failed because: \n", e) + + +def changePassword(user: str, old_password: str, new_password: str): + try: + if old_password == new_password: + raise Exception("same password") + + pwcheck = checkPassword(user, old_password) + if not pwcheck: + raise Exception("wrong password") + + pwBytes = new_password.encode('utf-8') + salt = bcrypt.gensalt() + pwHash = bcrypt.hashpw(pwBytes, salt) + + update_query = sql.SQL("UPDATE users SET password = %s WHERE username = %s;") + execute_query(update_query, (pwHash, user), commit=True) + + except Exception as e: + print("changePassword failed because: \n", e) + + +def deleteUser(user: str, password: str): + try: + pwcheck = checkPassword(user, password) + if not pwcheck: + raise Exception("wrong password") + + delete_query = sql.SQL("DELETE FROM users WHERE username = %s;") + execute_query(delete_query, (user,), commit=True) + + except Exception as e: + print("deleteUser failed because: \n", e) + + +def addOrganisation(organisationName: str, sessionToken: str): + try: + token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + userid = token.id + + insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " + "INSERT INTO membership (userid,organisationid) select (%s),id from a") + execute_query(insert_query, (organisationName, userid), commit=True) + + + except Exception as e: + print("addOrganisation failed because: \n", e) + + +def addUserTooOrganisation(organisationName: str, sessionToken: str, newUser: str): + try: + token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + userid = token.id + + insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " + "INSERT INTO membership (userid,organisationid) select (%s),id from a") + execute_query(insert_query, (organisationName, userid), commit=True) + + except Exception as e: + print("addOrganisation failed because: \n", e) From f8a80aec209cae578b5a563b60c5b938a3e3cf52 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 28 Nov 2023 14:32:10 +0100 Subject: [PATCH 014/254] add addUserToOrganisation and Authorisation --- config.py | 11 ++++++++++- postgres/transactions.py | 31 +++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index ff7af9d4..cd0c04ad 100644 --- a/config.py +++ b/config.py @@ -1,7 +1,16 @@ +from enum import Enum + + +class Authorisation(Enum): + Owner = 0 + Admin = 1 + Member = 10 + + jwtkey = "secret" class JWTFormat: - def __init__(self, user: str,id:int): + def __init__(self, user: str, id: int): self.user = user self.id = id diff --git a/postgres/transactions.py b/postgres/transactions.py index 77aa125d..a029ffde 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -2,7 +2,7 @@ from psycopg2 import sql import bcrypt import jwt -from config import jwtkey, JWTFormat +from config import jwtkey, JWTFormat, Authorisation from postgres.queries import checkPassword # Replace these values with your own @@ -110,14 +110,33 @@ def addOrganisation(organisationName: str, sessionToken: str): print("addOrganisation failed because: \n", e) -def addUserTooOrganisation(organisationName: str, sessionToken: str, newUser: str): +def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): try: token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id - insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " - "INSERT INTO membership (userid,organisationid) select (%s),id from a") - execute_query(insert_query, (organisationName, userid), commit=True) + # Combine the two queries into a single query + insert_query = sql.SQL(""" + WITH org AS ( + SELECT userid, organisationid + FROM membership + WHERE organisationid = (SELECT id FROM organisations WHERE name = %s) + ), user_info AS ( + SELECT id + FROM users + WHERE username = %s + ) + INSERT INTO membership (userid, organisationid) + SELECT %s, org.organisationid + FROM org, user_info, membership AS m + WHERE org.organisationid = m.organisationid + AND user_info.id = m.userid + AND m.authorisation >= %s + AND %s >= %s + """) + + execute_query(insert_query, (organisationName, newUser, userid, userid, str(Authorisation.Admin.value), userid), + commit=True) except Exception as e: - print("addOrganisation failed because: \n", e) + print("addUserToOrganisation failed because: \n", e) From 7e14258b8aa21815121a062fcdfc8b3d0b5fb1a6 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 28 Nov 2023 14:49:11 +0100 Subject: [PATCH 015/254] add removeUserFromOrganisation and adjUserAuthorisation adj reformat --- postgres/transactions.py | 61 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index a029ffde..59575a6c 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -58,7 +58,6 @@ def addUser(user: str, password: str): data_to_insert = (user, pwHash) execute_query(insert_data_query, data_to_insert, commit=True) - except Exception as e: print("addUser failed because: \n", e) @@ -105,7 +104,6 @@ def addOrganisation(organisationName: str, sessionToken: str): "INSERT INTO membership (userid,organisationid) select (%s),id from a") execute_query(insert_query, (organisationName, userid), commit=True) - except Exception as e: print("addOrganisation failed because: \n", e) @@ -115,7 +113,6 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id - # Combine the two queries into a single query insert_query = sql.SQL(""" WITH org AS ( SELECT userid, organisationid @@ -135,8 +132,64 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str AND %s >= %s """) - execute_query(insert_query, (organisationName, newUser, userid, userid, str(Authorisation.Admin.value), userid), + execute_query(insert_query, (organisationName, newUser, userid, userid, + str(Authorisation.Admin.value), userid), commit=True) except Exception as e: print("addUserToOrganisation failed because: \n", e) + + +def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToRemove: str): + try: + token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + userid = token.id + + delete_query = sql.SQL(""" + DELETE FROM membership + USING ( + SELECT userid, organisationid + FROM membership + WHERE organisationid = (SELECT id FROM organisations WHERE name = %s) + ) AS org + WHERE membership.organisationid = org.organisationid + AND membership.userid = (SELECT id FROM users WHERE username = %s) + AND membership.authorisation >= %s + AND %s >= %s + """) + + execute_query(delete_query, (organisationName, userToRemove, userid, userid, + str(Authorisation.Admin.value), userid), + commit=True) + + except Exception as e: + print("removeUserFromOrganisation failed because: \n", e) + + +def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: str, newAuthorisation: int): + try: + token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + author_userid = token.id + + # Combine the two queries into a single query + update_query = sql.SQL(""" + UPDATE membership + SET authorisation = %s + FROM ( + SELECT userid, organisationid, authorisation + FROM membership + WHERE organisationid = (SELECT id FROM organisations WHERE name = %s) + ) AS org + WHERE membership.organisationid = org.organisationid + AND membership.userid = (SELECT id FROM users WHERE username = %s) + AND org.authorisation >= %s -- Ensure the admin has higher or equal authorization + AND org.authorisation > %s -- Ensure the admin has higher authorization than Member + AND org.authorisation >= %s -- Ensure the new authorization is not higher than admin's + """) + + execute_query(update_query, (newAuthorisation, organisationName, userToAdjust, + str(Authorisation.Admin.value), str(Authorisation.Member.value), author_userid), + commit=True) + + except Exception as e: + print("adjUserAuthorisation failed because: \n", e) From 6a88ea1164a1217d81fca096896ecd07470283a0 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 28 Nov 2023 23:42:02 +0100 Subject: [PATCH 016/254] add dev build --- Dockerfile-backend => Dockerfile | 14 +++++++++++--- docker-compose.yaml | 10 ++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) rename Dockerfile-backend => Dockerfile (72%) diff --git a/Dockerfile-backend b/Dockerfile similarity index 72% rename from Dockerfile-backend rename to Dockerfile index 2a70bc15..8cacfe5b 100644 --- a/Dockerfile-backend +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9 +FROM python:3.9 as build USER root RUN mkdir /home/wannadb @@ -23,8 +23,16 @@ RUN pip install --use-pep517 pytest #copy the rest COPY . . -RUN chmod +x entrypoint.sh +FROM build as dev + +#CMD [ "python", "app.py" ] +CMD ["flask", "--app", "app", "--debug", "run","--host","0.0.0.0", "--port", "8000" ] + +FROM build as prod + +RUN chmod +x entrypoint.sh # Define the entrypoint.sh -ENTRYPOINT "/home/wannadb/entrypoint.sh" \ No newline at end of file +CMD ["sh","./entrypoint.sh"] + diff --git a/docker-compose.yaml b/docker-compose.yaml index ea96b483..84075b19 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,14 +3,16 @@ services: wannadb: build: context: . - dockerfile: Dockerfile-backend + dockerfile: Dockerfile + target: dev restart: always tty: true ports: - - "8000:8000" + - 8000:8000 depends_on: - postgres - + volumes: + - ./:/home/wannadb postgres: image: postgres @@ -30,4 +32,4 @@ networks: driver: bridge volumes: - pgdata: \ No newline at end of file + pgdata: From 32a9c526e9b46575eb2fc1e3965c6f8abb841a41 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 28 Nov 2023 23:42:52 +0100 Subject: [PATCH 017/254] add new routing --- app.py | 20 ++++++++++++++++++-- flask_app/endpoints.py | 9 +++++++++ flask_app/user.py | 24 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 flask_app/endpoints.py create mode 100644 flask_app/user.py diff --git a/app.py b/app.py index 14e2c17c..f4a18b74 100644 --- a/app.py +++ b/app.py @@ -1,11 +1,27 @@ +# app.py from flask import Flask from flask_cors import CORS +from flask_app.endpoints import main_routes +from flask_app.user import user_management + app = Flask(__name__) CORS(app) +# Register the blueprints +app.register_blueprint(main_routes) +app.register_blueprint(user_management) + + @app.route('/') -def hello_world(): # put application's code here - return 'Hello World!' +def hello_world(): + return 'Hello' + + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8000, debug=True) + + + diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py new file mode 100644 index 00000000..394a2e6c --- /dev/null +++ b/flask_app/endpoints.py @@ -0,0 +1,9 @@ +# main_routes.py +from flask import Blueprint + +main_routes = Blueprint('main_routes', __name__) + + +@main_routes.route('/api') +def hello_world(): + return "Hello a" diff --git a/flask_app/user.py b/flask_app/user.py new file mode 100644 index 00000000..09df5f9a --- /dev/null +++ b/flask_app/user.py @@ -0,0 +1,24 @@ +# main_routes.py +from flask import Blueprint, request, jsonify + +from config import User, encode +from postgres.transactions import addUser + +user_management = Blueprint('user_management', __name__) + + +@user_management.route('/registerrrrrrrrrrrrrrrrrr', methods=['POST']) +def register(): + data = request.get_json() + username = data.get('username') + password = data.get('password') + + _id = addUser(username, password) + + if id: + user = User(username, _id) + token = encode(user) + return jsonify({'message': 'User registered successfully', + 'token': token}) + + return jsonify({'message': 'User registered successfully'}) From 1cd952f371a7dcd43afd36b90c1ac8bea7a92f7f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 28 Nov 2023 23:43:38 +0100 Subject: [PATCH 018/254] add jwt token and user type --- config.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index cd0c04ad..4b914779 100644 --- a/config.py +++ b/config.py @@ -1,4 +1,7 @@ from enum import Enum +from dataclasses import dataclass + +import jwt class Authorisation(Enum): @@ -10,7 +13,18 @@ class Authorisation(Enum): jwtkey = "secret" -class JWTFormat: - def __init__(self, user: str, id: int): - self.user = user - self.id = id +def encode(obj): + return jwt.encode(obj, jwtkey, algorithm="HS256") + + +def decode(string: str): + token: User = jwt.decode(string, jwtkey, algorithm="HS256") + return User(token.user, token.id) + + +@dataclass +class User: + user: str + id: int + + From 573b2a34495d4da60e7660348c3b461bb2e77754 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 28 Nov 2023 23:44:09 +0100 Subject: [PATCH 019/254] add jwt token and user type --- postgres/transactions.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 59575a6c..681d3cb4 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -2,7 +2,7 @@ from psycopg2 import sql import bcrypt import jwt -from config import jwtkey, JWTFormat, Authorisation +from config import jwtkey, User, Authorisation from postgres.queries import checkPassword # Replace these values with your own @@ -38,6 +38,9 @@ def execute_query(query, params=None, commit=False): if commit: conn.commit() + result = cur.fetchall() + return result if result else None + except Exception as e: raise Exception(f"Query execution failed for query: {query} \nParams: {params} \nError: {e}") @@ -54,12 +57,14 @@ def addUser(user: str, password: str): salt = bcrypt.gensalt() pwHash = bcrypt.hashpw(pwBytes, salt) - insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s);") + insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") data_to_insert = (user, pwHash) - execute_query(insert_data_query, data_to_insert, commit=True) + return int(execute_query(insert_data_query, data_to_insert, commit=True)) except Exception as e: print("addUser failed because: \n", e) + finally: + return def changePassword(user: str, old_password: str, new_password: str): @@ -97,7 +102,7 @@ def deleteUser(user: str, password: str): def addOrganisation(organisationName: str, sessionToken: str): try: - token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " @@ -110,7 +115,7 @@ def addOrganisation(organisationName: str, sessionToken: str): def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): try: - token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id insert_query = sql.SQL(""" @@ -142,7 +147,7 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToRemove: str): try: - token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id delete_query = sql.SQL(""" @@ -168,7 +173,7 @@ def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToR def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: str, newAuthorisation: int): try: - token: JWTFormat = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") author_userid = token.id # Combine the two queries into a single query From f16ab87a17eafc13cd9b671e374fe3c9e36d431e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 29 Nov 2023 17:01:42 +0100 Subject: [PATCH 020/254] add network and quote ports --- docker-compose.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 84075b19..09b60e73 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,11 +8,13 @@ services: restart: always tty: true ports: - - 8000:8000 + - "8000:8000" depends_on: - postgres volumes: - ./:/home/wannadb + networks: + - mynetwork postgres: image: postgres From 0e33830fbe78df8a22f814cb8b4a0ef0b4a903fa Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 29 Nov 2023 18:06:12 +0100 Subject: [PATCH 021/254] adj naming and adding expiration to token --- config.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/config.py b/config.py index 4b914779..83dd934f 100644 --- a/config.py +++ b/config.py @@ -1,5 +1,7 @@ +import datetime from enum import Enum from dataclasses import dataclass +from typing import Any import jwt @@ -13,18 +15,20 @@ class Authorisation(Enum): jwtkey = "secret" -def encode(obj): +def encode(obj: dict[str, Any]): return jwt.encode(obj, jwtkey, algorithm="HS256") def decode(string: str): - token: User = jwt.decode(string, jwtkey, algorithm="HS256") - return User(token.user, token.id) + token: Token = jwt.decode(string, jwtkey, leeway=datetime.timedelta(minutes=1), algorithm="HS256", verify=True) + return Token(token.user, token.id, token.exp) @dataclass -class User: +class Token: user: str id: int + exp = datetime.datetime.now() + datetime.timedelta(hours=1) - + def dict(self): + return {"user": self.user, "id": self.id} From 32a63ce18d53dbee69748b1b32227dd81636d2df Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 29 Nov 2023 18:07:13 +0100 Subject: [PATCH 022/254] adj naming and reformation --- postgres/queries.py | 64 +++++------------------------ postgres/transactions.py | 89 ++++++++++------------------------------ postgres/util.py | 72 ++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 121 deletions(-) create mode 100644 postgres/util.py diff --git a/postgres/queries.py b/postgres/queries.py index 539115c4..ee9bdd12 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -1,78 +1,36 @@ -import psycopg2 -from psycopg2 import sql import bcrypt - -# Replace these values with your own -DB_NAME = "userManagement" -DB_USER = "postgres" -DB_PASSWORD = "0" -DB_HOST = "127.0.0.1" -DB_PORT = "5432" - - -def connectPG(): - try: - conn = psycopg2.connect( - dbname=DB_NAME, - user=DB_USER, - password=DB_PASSWORD, - host=DB_HOST, - port=DB_PORT) - return conn - except Exception as e: - print("Connection failed because: \n", e) - - -def execute_select_query(query, params=None): - conn = None - cur = None - try: - conn = connectPG() - cur = conn.cursor() - - cur.execute(query, params) - result = cur.fetchall() - - return result if result else None - - except Exception as e: - print(f"Query execution failed for query:\n" - f"{query} \n" - f"Params: {params} \n" - f"Error: {e}") - finally: - if conn: - conn.close() - if cur: - cur.close() +from flask import jsonify +from psycopg2 import sql +from postgres.util import execute_query def getUserID(user: str): select_query = sql.SQL("SELECT id FROM users WHERE username = %s;") - return execute_select_query(select_query, (user,)) + return execute_query(select_query, (user,)) def getOrganisationID(organisation_name: str): select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") - return execute_select_query(select_query, (organisation_name,)) + return execute_query(select_query, (organisation_name,)) def getMemberIDsFromOrganisationID(organisationID: int): select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") - return execute_select_query(select_query, (organisationID,)) + return execute_query(select_query, (organisationID,)) def getOrganisationIDsFromUserId(userID: int): select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") - return execute_select_query(select_query, (userID,)) + return execute_query(select_query, (userID,)) def checkPassword(user: str, password: str) -> bool: + select_query = sql.SQL("SELECT password FROM users WHERE username = %s;") - result = execute_select_query(select_query, (user,)) + result = execute_query(select_query, (user,)) try: if result[0]: - stored_password = result[0].tobytes() # sketchy conversion but works + stored_password = bytes(result[0][0]) # sketchy conversion but works return bcrypt.checkpw(password.encode('utf-8'), stored_password) return False @@ -88,7 +46,7 @@ def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: "and " "organisationid == (Select id from organisations where name == (%s))") - result = execute_select_query(select_query, (organisationName, userName)) + result = execute_query(select_query, (organisationName, userName)) try: if result[0]: authorisation = result[0] diff --git a/postgres/transactions.py b/postgres/transactions.py index 681d3cb4..da014eb0 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -1,54 +1,9 @@ -import psycopg2 -from psycopg2 import sql import bcrypt import jwt -from config import jwtkey, User, Authorisation +from psycopg2 import sql +from config import jwtkey, Token, Authorisation from postgres.queries import checkPassword - -# Replace these values with your own -DB_NAME = "userManagement" -DB_USER = "postgres" -DB_PASSWORD = "0" -DB_HOST = "127.0.0.1" -DB_PORT = "5432" - - -def connectPG(): - try: - conn = psycopg2.connect( - dbname=DB_NAME, - user=DB_USER, - password=DB_PASSWORD, - host=DB_HOST, - port=DB_PORT) - return conn - except Exception as e: - print("Connection failed because: \n", e) - - -def execute_query(query, params=None, commit=False): - conn = None - cur = None - try: - conn = connectPG() - cur = conn.cursor() - - cur.execute(query, params) - - if commit: - conn.commit() - - result = cur.fetchall() - return result if result else None - - except Exception as e: - raise Exception(f"Query execution failed for query: {query} \nParams: {params} \nError: {e}") - - finally: - if conn: - conn.close() - if cur: - cur.close() +from postgres.util import execute_transaction def addUser(user: str, password: str): @@ -59,12 +14,11 @@ def addUser(user: str, password: str): insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") data_to_insert = (user, pwHash) - return int(execute_query(insert_data_query, data_to_insert, commit=True)) + response = execute_transaction(insert_data_query, data_to_insert, commit=True) + return int(response[0][0]) except Exception as e: print("addUser failed because: \n", e) - finally: - return def changePassword(user: str, old_password: str, new_password: str): @@ -81,7 +35,7 @@ def changePassword(user: str, old_password: str, new_password: str): pwHash = bcrypt.hashpw(pwBytes, salt) update_query = sql.SQL("UPDATE users SET password = %s WHERE username = %s;") - execute_query(update_query, (pwHash, user), commit=True) + execute_transaction(update_query, (pwHash, user), commit=True) except Exception as e: print("changePassword failed because: \n", e) @@ -94,7 +48,7 @@ def deleteUser(user: str, password: str): raise Exception("wrong password") delete_query = sql.SQL("DELETE FROM users WHERE username = %s;") - execute_query(delete_query, (user,), commit=True) + execute_transaction(delete_query, (user,), commit=True) except Exception as e: print("deleteUser failed because: \n", e) @@ -102,12 +56,12 @@ def deleteUser(user: str, password: str): def addOrganisation(organisationName: str, sessionToken: str): try: - token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " "INSERT INTO membership (userid,organisationid) select (%s),id from a") - execute_query(insert_query, (organisationName, userid), commit=True) + execute_transaction(insert_query, (organisationName, userid), commit=True) except Exception as e: print("addOrganisation failed because: \n", e) @@ -115,7 +69,7 @@ def addOrganisation(organisationName: str, sessionToken: str): def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): try: - token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id insert_query = sql.SQL(""" @@ -137,9 +91,9 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str AND %s >= %s """) - execute_query(insert_query, (organisationName, newUser, userid, userid, - str(Authorisation.Admin.value), userid), - commit=True) + execute_transaction(insert_query, (organisationName, newUser, userid, userid, + str(Authorisation.Admin.value), userid), + commit=True) except Exception as e: print("addUserToOrganisation failed because: \n", e) @@ -147,7 +101,7 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToRemove: str): try: - token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") userid = token.id delete_query = sql.SQL(""" @@ -163,9 +117,9 @@ def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToR AND %s >= %s """) - execute_query(delete_query, (organisationName, userToRemove, userid, userid, - str(Authorisation.Admin.value), userid), - commit=True) + execute_transaction(delete_query, (organisationName, userToRemove, userid, userid, + str(Authorisation.Admin.value), userid), + commit=True) except Exception as e: print("removeUserFromOrganisation failed because: \n", e) @@ -173,10 +127,9 @@ def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToR def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: str, newAuthorisation: int): try: - token: User = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") author_userid = token.id - # Combine the two queries into a single query update_query = sql.SQL(""" UPDATE membership SET authorisation = %s @@ -192,9 +145,9 @@ def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: AND org.authorisation >= %s -- Ensure the new authorization is not higher than admin's """) - execute_query(update_query, (newAuthorisation, organisationName, userToAdjust, - str(Authorisation.Admin.value), str(Authorisation.Member.value), author_userid), - commit=True) + execute_transaction(update_query, (newAuthorisation, organisationName, userToAdjust, + str(Authorisation.Admin.value), str(Authorisation.Member.value), author_userid), + commit=True) except Exception as e: print("adjUserAuthorisation failed because: \n", e) diff --git a/postgres/util.py b/postgres/util.py new file mode 100644 index 00000000..e0c34af7 --- /dev/null +++ b/postgres/util.py @@ -0,0 +1,72 @@ +import psycopg2 +from psycopg2 import extensions + +DB_NAME = "userManagement" +DB_USER = "postgres" +DB_PASSWORD = "0" +DB_HOST = "postgres" +#DB_HOST = "127.0.0.1" +DB_PORT = "5432" + + +def connectPG(): + try: + conn = psycopg2.connect( + dbname=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + host=DB_HOST, + port=DB_PORT) + return conn + except Exception as e: + raise Exception("Connection failed because: \n", e) + + +def execute_transaction(query, params=None, commit=False): + conn = None + cur = None + try: + conn = connectPG() + cur = conn.cursor() + + cur.execute(query, params) + + if commit: + conn.commit() + + result = cur.fetchall() + return result if result else None + + except Exception as e: + raise Exception(f"Query execution failed for transaction: {query} \nParams: {params} \nError: {e}") + + finally: + if conn: + conn.close() + if cur: + cur.close() + + +def execute_query(query, params=None): + conn = None + cur = None + try: + conn = connectPG() + conn.set_isolation_level(extensions.ISOLATION_LEVEL_AUTOCOMMIT) + cur = conn.cursor() + + cur.execute(query, params) + result = cur.fetchall() + + return result if result else None + + except Exception as e: + raise Exception(f"Query execution failed for query:\n" + f"{query} \n" + f"Params: {params} \n" + f"Error: {e}") + finally: + if conn: + conn.close() + if cur: + cur.close() From 9ae18fdcd43f3b37f048065860502bbda0ec1546 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 29 Nov 2023 18:08:04 +0100 Subject: [PATCH 023/254] add register and login --- flask_app/user.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 09df5f9a..b0aa454f 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -1,13 +1,14 @@ # main_routes.py from flask import Blueprint, request, jsonify -from config import User, encode +from config import Token, encode +from postgres.queries import checkPassword from postgres.transactions import addUser user_management = Blueprint('user_management', __name__) -@user_management.route('/registerrrrrrrrrrrrrrrrrr', methods=['POST']) +@user_management.route('/register', methods=['GET', 'POST']) def register(): data = request.get_json() username = data.get('username') @@ -15,10 +16,29 @@ def register(): _id = addUser(username, password) - if id: - user = User(username, _id) - token = encode(user) + if _id: + user = Token(username, _id) + token = encode(user.dict()) + return jsonify({'message': 'User registered successfully', 'token': token}) - return jsonify({'message': 'User registered successfully'}) + return jsonify({'message': 'User register failed'}) + + +@user_management.route('/login', methods=['GET', 'POST']) +def login(): + data = request.get_json() + username = data.get('username') + password = data.get('password') + + _correct = checkPassword(username, password) + + if _correct: + user = Token(username, _correct) + token = encode(user.dict()) + + return jsonify({'message': 'Log in successfully', + 'token': token}) + else: + return jsonify({'message': 'Wrong Password'}) From b661b4c5207791495708b275c2d737caac989a99 Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Fri, 8 Dec 2023 12:04:22 +0100 Subject: [PATCH 024/254] configured login and register for the frontend --- app.py | 4 +++- flask_app/user.py | 8 ++++++-- postgres/queries.py | 12 ++++++++++-- postgres/transactions.py | 26 ++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/app.py b/app.py index f4a18b74..27f13299 100644 --- a/app.py +++ b/app.py @@ -3,6 +3,7 @@ from flask_cors import CORS from flask_app.endpoints import main_routes from flask_app.user import user_management +from postgres.transactions import createTables, dropTables @@ -13,7 +14,8 @@ app.register_blueprint(main_routes) app.register_blueprint(user_management) - +# TODO create table probably not here? +createTables() @app.route('/') def hello_world(): diff --git a/flask_app/user.py b/flask_app/user.py index b0aa454f..b692f225 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -8,6 +8,7 @@ user_management = Blueprint('user_management', __name__) +# TODO should only be POST? @user_management.route('/register', methods=['GET', 'POST']) def register(): data = request.get_json() @@ -21,11 +22,13 @@ def register(): token = encode(user.dict()) return jsonify({'message': 'User registered successfully', + 'status': True, 'token': token}) - return jsonify({'message': 'User register failed'}) + return jsonify({'message': 'User register failed', 'status': False}) +# TODO should only be GET? @user_management.route('/login', methods=['GET', 'POST']) def login(): data = request.get_json() @@ -39,6 +42,7 @@ def login(): token = encode(user.dict()) return jsonify({'message': 'Log in successfully', + 'status': True, 'token': token}) else: - return jsonify({'message': 'Wrong Password'}) + return jsonify({'message': 'Wrong Password', 'status': False}) diff --git a/postgres/queries.py b/postgres/queries.py index ee9bdd12..db91cd8b 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -26,11 +26,19 @@ def getOrganisationIDsFromUserId(userID: int): def checkPassword(user: str, password: str) -> bool: - select_query = sql.SQL("SELECT password FROM users WHERE username = %s;") + select_query = sql.SQL("SELECT password as pw FROM users WHERE username = %s;") result = execute_query(select_query, (user,)) + if not result: + print("checkPassword failed because: \n", "user not found") + return False + if len(result) > 1: + print("checkPassword failed because: \n", "more than 1 user with same name") + return False try: if result[0]: - stored_password = bytes(result[0][0]) # sketchy conversion but works + + #stored_password = bytes(result[0][0], encoding='utf-8') # sketchy conversion but works + stored_password = result[0][0].encode('utf-8') # i think thats better return bcrypt.checkpw(password.encode('utf-8'), stored_password) return False diff --git a/postgres/transactions.py b/postgres/transactions.py index da014eb0..8497254c 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -5,12 +5,38 @@ from postgres.queries import checkPassword from postgres.util import execute_transaction +# TODO workaround for now. +def createTables(): + createUserTable() + +# WARNING: This is only for development purposes! +def dropTables(): + try: + drop_table_query = sql.SQL("""DROP TABLE IF EXISTS users;""") + execute_transaction(drop_table_query, commit=True) + except Exception as e: + print("dropTables failed because: \n", e) + +# TODO workaround for now. Table creation should be done on startup +def createUserTable(): + try: + create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS users ( + id SERIAL PRIMARY KEY, + username VARCHAR(100) UNIQUE NOT NULL, + password VARCHAR(1000) NOT NULL + );""") + execute_transaction(create_table_query, commit=True) + except Exception as e: + print("createUserTable failed because: \n", e) def addUser(user: str, password: str): + createTables() try: pwBytes = password.encode('utf-8') salt = bcrypt.gensalt() pwHash = bcrypt.hashpw(pwBytes, salt) + # Needed this for the correct password check dont know why... + pwHash = pwHash.decode('utf-8') insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") data_to_insert = (user, pwHash) From c9d89ca159dcba8a2f515e872a8bf1c4c034dbc8 Mon Sep 17 00:00:00 2001 From: Phil1436 Date: Fri, 8 Dec 2023 12:04:35 +0100 Subject: [PATCH 025/254] added routes doc --- .gitignore | 3 ++ ROUTES.md | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 ROUTES.md diff --git a/.gitignore b/.gitignore index 12efe911..f7ac2a47 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ /.pytest_cache/ /models/ + +postgres/__pycache__ +flask_app/__pycache__ diff --git a/ROUTES.md b/ROUTES.md new file mode 100644 index 00000000..d1e09c7a --- /dev/null +++ b/ROUTES.md @@ -0,0 +1,101 @@ +# Routes _wannadbBackend_ + +The flask app is running by default on port 8000. Here we assume that the app is running on localhost. + +--- + +- [HelloWorld](#helloworld) +- [Register](#register) +- [Login](#login) + +--- + +## HelloWorld + +**GET** + +Say hello to the world. + +``` +http://localhost:8000/ +``` + +--- + +## Register + +**POST** + +Register a new user. + +``` +http://localhost:8000/register +``` + +### Body + +```json +{ + "username": "username", + "password": "password" +} +``` + +### Response + +- 200: User register **failed**: + ```json + { + "message": "User register failed", + "status": false + } + ``` +- 200: User register **success**: + ```json + { + "message": "User registered successfully", + "status": true, + "token": "eyJhbGciOiJIUI1NiIsIn5cCI6IkpXVCJ9.ey1c2VyIjocGhpbEiLCJpZCIM30.v_lKLd0X-PABkRFXHZa..." + } + ``` + +--- + +## Login + +**GET,POST** + +Login as user + +``` +http://localhost:8000/login +``` + +### Body + +```json +{ + "username": "username", + "password": "password" +} +``` + +### Response + +- 200: User login **failed**: + ```json + { + "message": "Wrong Password", + "status": false + } + ``` +- 200: User login **success**: + ```json + { + "message": "Log in successfully", + "status": true, + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1..." + } + ``` + +--- From 8198ed53ec11ae278aabe71e96f1e1a17ecfe667 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 15:23:21 +0100 Subject: [PATCH 026/254] add pylint --- .pylintrc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..62b5bd36 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,22 @@ +[MASTER] + +# Specify the Python version you are using +python = 3.9 + +# Add the directories or files you want to ignore +ignore = venv, virtualenv, __pycache__ + +[MESSAGES CONTROL] + +# Enable or disable messages based on your preferences +disable = C0330, C0114, C0115, C0116 + +[FORMAT] + +# Set the maximum number of characters per line +max-line-length = 120 + +# Specify the regular expressions for files or directories to include or exclude +include-ids = yes +indent-string = "\t" + From 96b7e44deb2b7500b3e38523d87a938ce0540a8d Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 15:23:51 +0100 Subject: [PATCH 027/254] split requirements.txt --- Dockerfile | 6 ++++-- backend-requirements.txt | 13 +++++++++++++ requirements.txt => core-requirements.txt | 9 +-------- 3 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 backend-requirements.txt rename requirements.txt => core-requirements.txt (97%) diff --git a/Dockerfile b/Dockerfile index 8cacfe5b..a67438c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,8 +8,10 @@ WORKDIR /home/wannadb RUN pip install --use-pep517 torch==1.10.0 # Install dependencies -COPY requirements.txt requirements.txt -RUN pip install --use-pep517 -r requirements.txt +COPY core-requirements.txt core-requirements.txt +RUN pip install --use-pep517 -r core-requirements.txt +COPY backend-requirements.txt backend-requirements.txt +RUN pip install --use-pep517 -r backend-requirements.txt ################################## ## do not change above ## ## changes above cause ## diff --git a/backend-requirements.txt b/backend-requirements.txt new file mode 100644 index 00000000..88421759 --- /dev/null +++ b/backend-requirements.txt @@ -0,0 +1,13 @@ +pip==23.3.1 +flask==3.0.0 +Flask_Cors==4.0.0 +gunicorn==21.2.0 +psycopg2~=2.9.9 +bcrypt~=4.0.1 +PyJWT~=2.8.0 +jupyter~=1.0.0 +wheel~=0.40.0 +tornado~=6.4 +setuptools~=69.0.2 +werkzeug~=3.0.1 +pylint~=3.0.3 \ No newline at end of file diff --git a/requirements.txt b/core-requirements.txt similarity index 97% rename from requirements.txt rename to core-requirements.txt index aee477a1..42da5b8c 100644 --- a/requirements.txt +++ b/core-requirements.txt @@ -230,11 +230,4 @@ wasabi==0.10.1 # thinc # The following packages are considered to be unsafe in a requirements file: -# setuptools - -flask==3.0.0 -Flask_Cors==4.0.0 -gunicorn==21.2.0 -psycopg2~=2.9.9 -bcrypt~=4.0.1 -PyJWT~=2.8.0 \ No newline at end of file +# setuptools \ No newline at end of file From 8b73a192663a608666e299760ab332b408e6c6e9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 15:24:37 +0100 Subject: [PATCH 028/254] create vev_routes and reformat --- app.py | 13 ++++--------- flask_app/dev.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 flask_app/dev.py diff --git a/app.py b/app.py index 27f13299..83390a15 100644 --- a/app.py +++ b/app.py @@ -1,11 +1,10 @@ # app.py from flask import Flask from flask_cors import CORS + +from flask_app.dev import dev_routes from flask_app.endpoints import main_routes from flask_app.user import user_management -from postgres.transactions import createTables, dropTables - - app = Flask(__name__) CORS(app) @@ -13,9 +12,8 @@ # Register the blueprints app.register_blueprint(main_routes) app.register_blueprint(user_management) +app.register_blueprint(dev_routes) -# TODO create table probably not here? -createTables() @app.route('/') def hello_world(): @@ -23,7 +21,4 @@ def hello_world(): if __name__ == '__main__': - app.run(host='0.0.0.0', port=8000, debug=True) - - - + app.run(host='0.0.0.0', port=8000, debug=True) diff --git a/flask_app/dev.py b/flask_app/dev.py new file mode 100644 index 00000000..ce9064ae --- /dev/null +++ b/flask_app/dev.py @@ -0,0 +1,22 @@ +from flask import Blueprint, request +from werkzeug.datastructures import FileStorage +from werkzeug.utils import secure_filename + +from postgres.transactions import createUserTable +from config import decode + +dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') + + +@dev_routes.route('/createTables', methods=['POST']) +def createTables(): + try: + createUserTable() + finally: + # Remove the temporary files + for file in files: + filename = secure_filename(file.filename) + if os.path.exists(filename): + os.remove(filename) + + return 'Files uploaded successfully' From 38d8a59b7c9e34e65c41f82ff8752629db74eea9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 15:25:48 +0100 Subject: [PATCH 029/254] reformat --- flask_app/user.py | 6 ++---- postgres/transactions.py | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index b692f225..60696f62 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -8,8 +8,7 @@ user_management = Blueprint('user_management', __name__) -# TODO should only be POST? -@user_management.route('/register', methods=['GET', 'POST']) +@user_management.route('/register', methods=['POST']) def register(): data = request.get_json() username = data.get('username') @@ -28,8 +27,7 @@ def register(): return jsonify({'message': 'User register failed', 'status': False}) -# TODO should only be GET? -@user_management.route('/login', methods=['GET', 'POST']) +@user_management.route('/login', methods=['POST']) def login(): data = request.get_json() username = data.get('username') diff --git a/postgres/transactions.py b/postgres/transactions.py index 8497254c..b50d7f46 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -5,10 +5,6 @@ from postgres.queries import checkPassword from postgres.util import execute_transaction -# TODO workaround for now. -def createTables(): - createUserTable() - # WARNING: This is only for development purposes! def dropTables(): try: @@ -17,7 +13,7 @@ def dropTables(): except Exception as e: print("dropTables failed because: \n", e) -# TODO workaround for now. Table creation should be done on startup + def createUserTable(): try: create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS users ( @@ -172,8 +168,21 @@ def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: """) execute_transaction(update_query, (newAuthorisation, organisationName, userToAdjust, - str(Authorisation.Admin.value), str(Authorisation.Member.value), author_userid), + str(Authorisation.Admin.value), str(Authorisation.Member.value), + author_userid), commit=True) except Exception as e: print("adjUserAuthorisation failed because: \n", e) + + +def addDocument(name: str, content: str, organisationid: int, userid: int): + try: + insert_data_query = sql.SQL("INSERT INTO documents (name,content,organisationid,userid) " + "VALUES (%s, %s,%s, %s) returning id;") + data_to_insert = (name, content, organisationid, userid) + response = execute_transaction(insert_data_query, data_to_insert, commit=True) + return int(response[0][0]) + + except Exception as e: + print("addDocument failed because: \n", e) From 06286974862663b8311294f08bac169032444987 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 20:10:26 +0100 Subject: [PATCH 030/254] adjust requirements --- backend-requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 88421759..9d3071a3 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -5,7 +5,6 @@ gunicorn==21.2.0 psycopg2~=2.9.9 bcrypt~=4.0.1 PyJWT~=2.8.0 -jupyter~=1.0.0 wheel~=0.40.0 tornado~=6.4 setuptools~=69.0.2 From 5dd4bbbbe2f149cc507f9a078bf802b4fdebf433 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 20:17:14 +0100 Subject: [PATCH 031/254] fix decode --- config.py | 20 +++++++++++++------- postgres/transactions.py | 12 +++++------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/config.py b/config.py index 83dd934f..bfa6a13e 100644 --- a/config.py +++ b/config.py @@ -12,23 +12,29 @@ class Authorisation(Enum): Member = 10 -jwtkey = "secret" +_jwtkey = "secret" def encode(obj: dict[str, Any]): - return jwt.encode(obj, jwtkey, algorithm="HS256") + return jwt.encode(obj, _jwtkey, algorithm="HS256") def decode(string: str): - token: Token = jwt.decode(string, jwtkey, leeway=datetime.timedelta(minutes=1), algorithm="HS256", verify=True) - return Token(token.user, token.id, token.exp) + decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) + user = decoded_token.get('user') + _id = decoded_token.get('id') + exp = decoded_token.get('exp') + return Token(user, _id, exp) -@dataclass class Token: user: str id: int - exp = datetime.datetime.now() + datetime.timedelta(hours=1) - def dict(self): + def __init__(self, user: str, _id: int, exp=datetime.datetime.now() + datetime.timedelta(hours=1)): + self.user = user + self.id = _id + self.exp = exp + + def json(self): return {"user": self.user, "id": self.id} diff --git a/postgres/transactions.py b/postgres/transactions.py index b50d7f46..2b94e938 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -1,7 +1,6 @@ import bcrypt -import jwt from psycopg2 import sql -from config import jwtkey, Token, Authorisation +from config import Token, Authorisation,encode,decode from postgres.queries import checkPassword from postgres.util import execute_transaction @@ -26,7 +25,6 @@ def createUserTable(): print("createUserTable failed because: \n", e) def addUser(user: str, password: str): - createTables() try: pwBytes = password.encode('utf-8') salt = bcrypt.gensalt() @@ -78,7 +76,7 @@ def deleteUser(user: str, password: str): def addOrganisation(organisationName: str, sessionToken: str): try: - token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = decode(sessionToken) userid = token.id insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " @@ -91,7 +89,7 @@ def addOrganisation(organisationName: str, sessionToken: str): def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): try: - token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = decode(sessionToken) userid = token.id insert_query = sql.SQL(""" @@ -123,7 +121,7 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToRemove: str): try: - token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = decode(sessionToken) userid = token.id delete_query = sql.SQL(""" @@ -149,7 +147,7 @@ def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToR def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: str, newAuthorisation: int): try: - token: Token = jwt.decode(sessionToken, jwtkey, algorithm="HS256") + token: Token = decode(sessionToken) author_userid = token.id update_query = sql.SQL(""" From 6094d6195045b8422f5774ae0896ef65a3f878f9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 20:17:51 +0100 Subject: [PATCH 032/254] rename --- flask_app/user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flask_app/user.py b/flask_app/user.py index 60696f62..067dc637 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -18,7 +18,7 @@ def register(): if _id: user = Token(username, _id) - token = encode(user.dict()) + token = encode(user.json()) return jsonify({'message': 'User registered successfully', 'status': True, From f080d7c6e157c27c32ce793443d3795d3cf23ad5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 11 Dec 2023 20:18:33 +0100 Subject: [PATCH 033/254] add file upload --- flask_app/endpoints.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 394a2e6c..f8d06ddd 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -1,9 +1,33 @@ -# main_routes.py -from flask import Blueprint +from flask import Blueprint, request,jsonify +from werkzeug.datastructures import FileStorage +from werkzeug.utils import secure_filename +from config import decode -main_routes = Blueprint('main_routes', __name__) +from postgres.transactions import addDocument +main_routes = Blueprint('main_routes', __name__, url_prefix='/data') -@main_routes.route('/api') -def hello_world(): - return "Hello a" + +@main_routes.route('/upload', methods=['POST']) +def upload_files(): + try: + files = request.files + form = request.form + + authorization = form.get("authorization") + organisation_id = form.get("organisationid") + + token = decode(authorization) + + dokument_ids: list[int] = [] + + for _filename, storage in files.items(): + filename = secure_filename(_filename) + t = storage.read() + dokument_id = addDocument(filename, t, organisation_id, token.id) + dokument_ids.append(dokument_id) + + return jsonify(dokument_ids) + + except Exception as e: + raise Exception("upload files failed because: \n", e) From 4df4e24ae313dcc7cfa22638b2d30dc2b439e3f0 Mon Sep 17 00:00:00 2001 From: phil1436 Date: Tue, 12 Dec 2023 14:10:48 +0100 Subject: [PATCH 034/254] bug fixes with user login in createTables --- flask_app/dev.py | 17 +++++++++++------ flask_app/user.py | 10 +++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index ce9064ae..63f185a8 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -12,11 +12,16 @@ def createTables(): try: createUserTable() - finally: + + except Exception as e: + return str(e) + + #finally: + # TODO whats that? # Remove the temporary files - for file in files: - filename = secure_filename(file.filename) - if os.path.exists(filename): - os.remove(filename) + #for file in files: + # filename = secure_filename(file.filename) + # if os.path.exists(filename): + # os.remove(filename) - return 'Files uploaded successfully' + return 'Table created successfully' diff --git a/flask_app/user.py b/flask_app/user.py index 067dc637..b651fa19 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -37,7 +37,15 @@ def login(): if _correct: user = Token(username, _correct) - token = encode(user.dict()) + print(user) + # TODO I get error here : 'Token' object has no attribute 'dict' + #token = encode(user.dict()) + + # workaround for now + # TODO remove this + token = encode(user.json()) + + print(token) return jsonify({'message': 'Log in successfully', 'status': True, From 5b89477cfdf6cae52f9e27d571a6467bd76d7094 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 13:23:24 +0100 Subject: [PATCH 035/254] refactor: renaming of variable --- postgres/transactions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 2b94e938..92babb90 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -174,11 +174,11 @@ def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: print("adjUserAuthorisation failed because: \n", e) -def addDocument(name: str, content: str, organisationid: int, userid: int): +def addDocument(name: str, content: str, organisationId: int, userid: int): try: insert_data_query = sql.SQL("INSERT INTO documents (name,content,organisationid,userid) " "VALUES (%s, %s,%s, %s) returning id;") - data_to_insert = (name, content, organisationid, userid) + data_to_insert = (name, content, organisationId, userid) response = execute_transaction(insert_data_query, data_to_insert, commit=True) return int(response[0][0]) From 1517a3b501908525143e413e70253044a2448e8e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 13:26:00 +0100 Subject: [PATCH 036/254] refactor: renaming of variable --- flask_app/endpoints.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index f8d06ddd..bcdfe320 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -1,8 +1,7 @@ -from flask import Blueprint, request,jsonify -from werkzeug.datastructures import FileStorage +from flask import Blueprint, request, jsonify from werkzeug.utils import secure_filename -from config import decode +from config import decode from postgres.transactions import addDocument main_routes = Blueprint('main_routes', __name__, url_prefix='/data') @@ -15,7 +14,7 @@ def upload_files(): form = request.form authorization = form.get("authorization") - organisation_id = form.get("organisationid") + organisation_id = int(form.get("organisationId")) token = decode(authorization) @@ -23,8 +22,8 @@ def upload_files(): for _filename, storage in files.items(): filename = secure_filename(_filename) - t = storage.read() - dokument_id = addDocument(filename, t, organisation_id, token.id) + content = storage.read() + dokument_id = addDocument(filename, content, organisation_id, token.id) dokument_ids.append(dokument_id) return jsonify(dokument_ids) From bcb0c6550315943fbb0e7dce600e1cc75cdc1188 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 13:59:00 +0100 Subject: [PATCH 037/254] refactor: renaming of variables --- config.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/config.py b/config.py index bfa6a13e..38879544 100644 --- a/config.py +++ b/config.py @@ -1,6 +1,5 @@ import datetime from enum import Enum -from dataclasses import dataclass from typing import Any import jwt @@ -15,16 +14,16 @@ class Authorisation(Enum): _jwtkey = "secret" -def encode(obj: dict[str, Any]): +def tokenEncode(obj: dict[str, Any]): return jwt.encode(obj, _jwtkey, algorithm="HS256") -def decode(string: str): - decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) - user = decoded_token.get('user') - _id = decoded_token.get('id') - exp = decoded_token.get('exp') - return Token(user, _id, exp) +def tokenDecode(string: str): + decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) + user = decoded_token.get('user') + _id = decoded_token.get('id') + exp = decoded_token.get('exp') + return Token(user, _id, exp) class Token: From 679f33b40d7df764564884f25a17e91270c828a5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 14:00:40 +0100 Subject: [PATCH 038/254] refactor: renaming of variables --- flask_app/endpoints.py | 4 ++-- postgres/queries.py | 5 ++--- postgres/transactions.py | 12 +++++++----- wannadb/data/data.py | 4 ++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index bcdfe320..5f071568 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -1,7 +1,7 @@ from flask import Blueprint, request, jsonify from werkzeug.utils import secure_filename -from config import decode +from config import tokenDecode from postgres.transactions import addDocument main_routes = Blueprint('main_routes', __name__, url_prefix='/data') @@ -16,7 +16,7 @@ def upload_files(): authorization = form.get("authorization") organisation_id = int(form.get("organisationId")) - token = decode(authorization) + token = tokenDecode(authorization) dokument_ids: list[int] = [] diff --git a/postgres/queries.py b/postgres/queries.py index db91cd8b..1ab33701 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -36,9 +36,8 @@ def checkPassword(user: str, password: str) -> bool: return False try: if result[0]: - - #stored_password = bytes(result[0][0], encoding='utf-8') # sketchy conversion but works - stored_password = result[0][0].encode('utf-8') # i think thats better + # stored_password = bytes(result[0][0], encoding='utf-8') # sketchy conversion but works + stored_password = result[0][0].tokenEncode('utf-8') # i think thats better return bcrypt.checkpw(password.encode('utf-8'), stored_password) return False diff --git a/postgres/transactions.py b/postgres/transactions.py index 92babb90..82f0e9f0 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -1,9 +1,10 @@ import bcrypt from psycopg2 import sql -from config import Token, Authorisation,encode,decode +from config import Token, Authorisation, tokenDecode from postgres.queries import checkPassword from postgres.util import execute_transaction + # WARNING: This is only for development purposes! def dropTables(): try: @@ -24,6 +25,7 @@ def createUserTable(): except Exception as e: print("createUserTable failed because: \n", e) + def addUser(user: str, password: str): try: pwBytes = password.encode('utf-8') @@ -76,7 +78,7 @@ def deleteUser(user: str, password: str): def addOrganisation(organisationName: str, sessionToken: str): try: - token: Token = decode(sessionToken) + token: Token = tokenDecode(sessionToken) userid = token.id insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " @@ -89,7 +91,7 @@ def addOrganisation(organisationName: str, sessionToken: str): def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): try: - token: Token = decode(sessionToken) + token: Token = tokenDecode(sessionToken) userid = token.id insert_query = sql.SQL(""" @@ -121,7 +123,7 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToRemove: str): try: - token: Token = decode(sessionToken) + token: Token = tokenDecode(sessionToken) userid = token.id delete_query = sql.SQL(""" @@ -147,7 +149,7 @@ def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToR def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: str, newAuthorisation: int): try: - token: Token = decode(sessionToken) + token: Token = tokenDecode(sessionToken) author_userid = token.id update_query = sql.SQL(""" diff --git a/wannadb/data/data.py b/wannadb/data/data.py index 3a802048..fcb166d4 100644 --- a/wannadb/data/data.py +++ b/wannadb/data/data.py @@ -599,7 +599,7 @@ def to_bson(self) -> bytes: serializable_base["documents"].append(serializable_document) logger.info("Convert to BSON bytes.") - bson_bytes: bytes = bson.encode(serializable_base) + bson_bytes: bytes = bson.tokenEncode(serializable_base) tack: float = time.time() logger.info(f"Serialized document base in {tack - tick} seconds.") @@ -619,7 +619,7 @@ def from_bson(cls, bson_bytes: bytes) -> "DocumentBase": tick: float = time.time() logger.info("Convert from BSON bytes.") - serialized_base: Dict[str, Any] = bson.decode(bson_bytes) + serialized_base: Dict[str, Any] = bson.tokenDecode(bson_bytes) # deserialize the document base document_base: "DocumentBase" = cls([], []) From 216f62f63bd6ec8d14bd63a0e9da95fd0eb91ee8 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 14:02:21 +0100 Subject: [PATCH 039/254] refactor: change file access --- flask_app/endpoints.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 5f071568..8a4ab10e 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -10,7 +10,7 @@ @main_routes.route('/upload', methods=['POST']) def upload_files(): try: - files = request.files + files = request.files.getlist('files') form = request.form authorization = form.get("authorization") @@ -20,9 +20,10 @@ def upload_files(): dokument_ids: list[int] = [] - for _filename, storage in files.items(): - filename = secure_filename(_filename) - content = storage.read() + for file in files: + file_content = file.read() + filename = file.filename + content = str(file_content.tokenDecode('utf-8')) dokument_id = addDocument(filename, content, organisation_id, token.id) dokument_ids.append(dokument_id) From ab2cfe55d703a23cb618291d294d2a767f3c0ad5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 14:37:33 +0100 Subject: [PATCH 040/254] refactor: change file access again --- flask_app/endpoints.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 8a4ab10e..81ed3043 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -1,5 +1,4 @@ from flask import Blueprint, request, jsonify -from werkzeug.utils import secure_filename from config import tokenDecode from postgres.transactions import addDocument @@ -10,7 +9,7 @@ @main_routes.route('/upload', methods=['POST']) def upload_files(): try: - files = request.files.getlist('files') + files = request.files.getlist('file') form = request.form authorization = form.get("authorization") @@ -18,14 +17,18 @@ def upload_files(): token = tokenDecode(authorization) - dokument_ids: list[int] = [] + dokument_ids: list = [] for file in files: - file_content = file.read() - filename = file.filename - content = str(file_content.tokenDecode('utf-8')) - dokument_id = addDocument(filename, content, organisation_id, token.id) - dokument_ids.append(dokument_id) + content_type = file.content_type + if 'text/plain' in content_type: + filename = file.filename + content = str(file.stream.read().decode('utf-8')) + dokument_id = addDocument(filename, content, organisation_id, token.id) + print(dokument_id) + dokument_ids.append(dokument_id) + else: + dokument_ids.append(f"wrong type {content_type}") return jsonify(dokument_ids) From 2f36ad4e6921598a3301ed2bd951b6db57653de7 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 14:38:25 +0100 Subject: [PATCH 041/254] feat: add _getDocument --- postgres/queries.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index 1ab33701..e8e55d82 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -1,5 +1,4 @@ import bcrypt -from flask import jsonify from psycopg2 import sql from postgres.util import execute_query @@ -25,7 +24,6 @@ def getOrganisationIDsFromUserId(userID: int): def checkPassword(user: str, password: str) -> bool: - select_query = sql.SQL("SELECT password as pw FROM users WHERE username = %s;") result = execute_query(select_query, (user,)) if not result: @@ -49,9 +47,9 @@ def checkPassword(user: str, password: str) -> bool: def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: select_query = sql.SQL("SELECT membership from membership " - "where userid == (SELECT id from users where username == (%s)) " + "where userid = (SELECT id from users where username = (%s)) " "and " - "organisationid == (Select id from organisations where name == (%s))") + "organisationid = (Select id from organisations where name = (%s))") result = execute_query(select_query, (organisationName, userName)) try: @@ -62,3 +60,19 @@ def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: except Exception as e: print("checkOrganisationAuthorisation failed because: \n", e) return 99 + + +def _getDocument(documentId: int): + select_query = sql.SQL("SELECT content " + "from documents " + "where id = (%s)") + + result = execute_query(select_query, (documentId,)) + try: + if result[0]: + content = result[0] + return content + + except Exception as e: + print("checkOrganisationAuthorisation failed because: \n", e) + return 99 From f9094677649545781e75ab76920621f5df1971ee Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 14:39:30 +0100 Subject: [PATCH 042/254] refactor: tokenEncode --- flask_app/user.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index b651fa19..eea831e0 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -1,7 +1,7 @@ # main_routes.py from flask import Blueprint, request, jsonify -from config import Token, encode +from config import Token, tokenEncode from postgres.queries import checkPassword from postgres.transactions import addUser @@ -18,7 +18,7 @@ def register(): if _id: user = Token(username, _id) - token = encode(user.json()) + token = tokenEncode(user.json()) return jsonify({'message': 'User registered successfully', 'status': True, @@ -37,15 +37,7 @@ def login(): if _correct: user = Token(username, _correct) - print(user) - # TODO I get error here : 'Token' object has no attribute 'dict' - #token = encode(user.dict()) - - # workaround for now - # TODO remove this - token = encode(user.json()) - - print(token) + token = tokenEncode(user.json()) return jsonify({'message': 'Log in successfully', 'status': True, From 2cce12a9be7bbc38d1be85b053126c1387a099e5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 14:41:29 +0100 Subject: [PATCH 043/254] refactor: createUserTable --- flask_app/dev.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index 63f185a8..eb95f16e 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -1,9 +1,6 @@ -from flask import Blueprint, request -from werkzeug.datastructures import FileStorage -from werkzeug.utils import secure_filename +from flask import Blueprint from postgres.transactions import createUserTable -from config import decode dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') @@ -12,16 +9,8 @@ def createTables(): try: createUserTable() - + return 'createUserTable successfully' except Exception as e: - return str(e) - - #finally: - # TODO whats that? - # Remove the temporary files - #for file in files: - # filename = secure_filename(file.filename) - # if os.path.exists(filename): - # os.remove(filename) + print("createTables failed because: \n", e) return 'Table created successfully' From 0320ecb72dc0311a260b24705fede872377ddd34 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 15:18:19 +0100 Subject: [PATCH 044/254] refactor: status codes --- flask_app/endpoints.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 81ed3043..8a321cbc 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -1,4 +1,4 @@ -from flask import Blueprint, request, jsonify +from flask import Blueprint, request, make_response from config import tokenDecode from postgres.transactions import addDocument @@ -30,7 +30,11 @@ def upload_files(): else: dokument_ids.append(f"wrong type {content_type}") - return jsonify(dokument_ids) + if all(isinstance(dokument_ids, str) for _ in dokument_ids): + return make_response(dokument_ids, 400) + if any(isinstance(dokument_ids, str) for _ in dokument_ids): + return make_response(dokument_ids, 207) + return make_response(dokument_ids, 201) except Exception as e: - raise Exception("upload files failed because: \n", e) + return make_response({"message": "Upload failed", "details": str(e)}, 500) From 9cc84c458dbbaf302d911173b877412d2a034b07 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 15:18:39 +0100 Subject: [PATCH 045/254] refactor: status codes --- flask_app/user.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index eea831e0..4e28cd71 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -1,5 +1,5 @@ # main_routes.py -from flask import Blueprint, request, jsonify +from flask import Blueprint, request, make_response from config import Token, tokenEncode from postgres.queries import checkPassword @@ -20,11 +20,10 @@ def register(): user = Token(username, _id) token = tokenEncode(user.json()) - return jsonify({'message': 'User registered successfully', - 'status': True, - 'token': token}) + return make_response({'message': 'User registered successfully', + 'token': token}, 201) - return jsonify({'message': 'User register failed', 'status': False}) + return make_response({'message': 'User register failed'}, 422) @user_management.route('/login', methods=['POST']) @@ -39,8 +38,10 @@ def login(): user = Token(username, _correct) token = tokenEncode(user.json()) - return jsonify({'message': 'Log in successfully', - 'status': True, - 'token': token}) + return make_response({'message': 'Log in successfully', + 'token': token}, 200) else: - return jsonify({'message': 'Wrong Password', 'status': False}) + return make_response({'message': 'Wrong Password'}, 401) + + + From 42260900b9c907ee32c4a3eb757fd292566ea834 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 15:19:22 +0100 Subject: [PATCH 046/254] refactor: add upload and create as well as adj the status codes --- ROUTES.md | 62 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index d1e09c7a..54a3ce78 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -1,12 +1,14 @@ # Routes _wannadbBackend_ -The flask app is running by default on port 8000. Here we assume that the app is running on localhost. +The Flask app is running by default on port 8000. Here we assume that the app is running on localhost. --- - [HelloWorld](#helloworld) - [Register](#register) - [Login](#login) +- [Upload Files](#upload-files) +- [Create Tables (Development)](#create-tables) --- @@ -14,7 +16,6 @@ The flask app is running by default on port 8000. Here we assume that the app is **GET** -Say hello to the world. ``` http://localhost:8000/ @@ -43,18 +44,16 @@ http://localhost:8000/register ### Response -- 200: User register **failed**: +- 422 : User register **failed**: ```json { - "message": "User register failed", - "status": false + "message": "User register failed" } ``` -- 200: User register **success**: +- 201: User register **success**: ```json { "message": "User registered successfully", - "status": true, "token": "eyJhbGciOiJIUI1NiIsIn5cCI6IkpXVCJ9.ey1c2VyIjocGhpbEiLCJpZCIM30.v_lKLd0X-PABkRFXHZa..." } ``` @@ -63,7 +62,7 @@ http://localhost:8000/register ## Login -**GET,POST** +**POST** Login as user @@ -82,20 +81,59 @@ http://localhost:8000/login ### Response -- 200: User login **failed**: +- 401: User login **failed**: ```json { - "message": "Wrong Password", - "status": false + "message": "Wrong Password" } ``` - 200: User login **success**: ```json { "message": "Log in successfully", - "status": true, "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1..." } ``` --- + +## Upload Files + +**POST** + +Upload files. +``` +http://localhost:8000/data/upload +``` + +### Body + +- `file` (form-data): Files to upload +- `authorization` (form-data): User authorization token +- `organisationId` (form-data): Organization ID + +### Response + +- 400: Upload **failed**: + ``` + Returns a list of document file types. + ``` +- 207: Upload **partial success**: + ``` + Returns a list of document file types and documentIds. + ``` +- 201: Upload **success**: + ``` + Returns a list of documentIds. + ``` + +--- + +## create-tables + +**POST** + +Create tables (Development). +``` +http://localhost:8000/create-tables +``` \ No newline at end of file From b5ebb8cd6ddb2a05376396e4b8596431982a47f8 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 15:37:23 +0100 Subject: [PATCH 047/254] fix(checkPassword): encoding was not necessary --- postgres/queries.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index e8e55d82..59c131b9 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -34,8 +34,7 @@ def checkPassword(user: str, password: str) -> bool: return False try: if result[0]: - # stored_password = bytes(result[0][0], encoding='utf-8') # sketchy conversion but works - stored_password = result[0][0].tokenEncode('utf-8') # i think thats better + stored_password = bytes(result[0][0]) # sketchy conversion but works return bcrypt.checkpw(password.encode('utf-8'), stored_password) return False From df44fdcd966831a99b97a41c2a7b09c7e533af3f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 15:38:43 +0100 Subject: [PATCH 048/254] refactor(checkPassword): multiple users with the same name is not intended therefor check is not needed --- postgres/queries.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index 59c131b9..bd35f357 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -26,12 +26,6 @@ def getOrganisationIDsFromUserId(userID: int): def checkPassword(user: str, password: str) -> bool: select_query = sql.SQL("SELECT password as pw FROM users WHERE username = %s;") result = execute_query(select_query, (user,)) - if not result: - print("checkPassword failed because: \n", "user not found") - return False - if len(result) > 1: - print("checkPassword failed because: \n", "more than 1 user with same name") - return False try: if result[0]: stored_password = bytes(result[0][0]) # sketchy conversion but works From a530e81c63c6edb6159b2b6cb2d978f771ed2e83 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 15:57:19 +0100 Subject: [PATCH 049/254] feat: add all tables for dev --- postgres/transactions.py | 103 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 6 deletions(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 82f0e9f0..65fdc2d1 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -8,7 +8,10 @@ # WARNING: This is only for development purposes! def dropTables(): try: - drop_table_query = sql.SQL("""DROP TABLE IF EXISTS users;""") + drop_table_query = sql.SQL("DROP TABLE IF EXISTS public.users;\n" + "DROP TABLE IF EXISTS public.documents;\n" + "DROP TABLE IF EXISTS public.membership;\n" + "DROP TABLE IF EXISTS public.organisations;") execute_transaction(drop_table_query, commit=True) except Exception as e: print("dropTables failed because: \n", e) @@ -16,11 +19,99 @@ def dropTables(): def createUserTable(): try: - create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS users ( - id SERIAL PRIMARY KEY, - username VARCHAR(100) UNIQUE NOT NULL, - password VARCHAR(1000) NOT NULL - );""") + create_table_query = sql.SQL("CREATE TABLE IF NOT EXISTS users (\n" + " id SERIAL PRIMARY KEY,\n" + " username VARCHAR(100) UNIQUE NOT NULL,\n" + " password VARCHAR(1000) NOT NULL\n" + " );") + execute_transaction(create_table_query, commit=True) + except Exception as e: + print("createUserTable failed because: \n", e) + + +def createDocumentsTable(): + try: + create_table_query = sql.SQL("CREATE TABLE IF NOT EXISTS public.documents\n" + "(\n" + "id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 " + "MAXVALUE 9223372036854775807 CACHE 1 ),\n" + " name text COLLATE pg_catalog.\"default\" NOT NULL,\n" + " content text COLLATE pg_catalog.\"default\" NOT NULL,\n" + " organisationid bigint NOT NULL,\n" + " userid bigint NOT NULL,\n" + " CONSTRAINT dokumentid PRIMARY KEY (id),\n" + " CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid)\n" + " REFERENCES public.organisations (id) MATCH SIMPLE\n" + " ON UPDATE NO ACTION\n" + " ON DELETE NO ACTION\n" + " NOT VALID,\n" + " CONSTRAINT documents_userid_fkey FOREIGN KEY (userid)\n" + " REFERENCES public.users (id) MATCH SIMPLE\n" + " ON UPDATE NO ACTION\n" + " ON DELETE NO ACTION\n" + " NOT VALID\n" + ")\n" + "\n" + "TABLESPACE pg_default;\n" + "\n" + "ALTER TABLE IF EXISTS public.documents\n" + " OWNER to postgres;") + execute_transaction(create_table_query, commit=True) + except Exception as e: + print("createUserTable failed because: \n", e) + + +def createMembershipTable(): + try: + create_table_query = sql.SQL("" + "\n" + "CREATE TABLE IF NOT EXISTS public.membership\n" + "(\n" + " userid bigint NOT NULL,\n" + " organisationid bigint NOT NULL,\n" + " authorisation bigint NOT NULL DEFAULT 0,\n" + " CONSTRAINT membership_pkey PRIMARY KEY (userid, organisationid),\n" + " CONSTRAINT membership_organisationid_fkey FOREIGN KEY (organisationid)\n" + " REFERENCES public.organisations (id) MATCH SIMPLE\n" + " ON UPDATE NO ACTION\n" + " ON DELETE NO ACTION\n" + " NOT VALID,\n" + " CONSTRAINT membership_userid_fkey FOREIGN KEY (userid)\n" + " REFERENCES public.users (id) MATCH SIMPLE\n" + " ON UPDATE NO ACTION\n" + " ON DELETE NO ACTION\n" + " NOT VALID\n" + ")\n" + "\n" + "TABLESPACE pg_default;\n" + "\n" + "ALTER TABLE IF EXISTS public.membership\n" + " OWNER to postgres;\n" + "CREATE INDEX IF NOT EXISTS fki_organisationid\n" + " ON public.membership USING btree\n" + " (organisationid ASC NULLS LAST)\n" + " TABLESPACE pg_default;") + execute_transaction(create_table_query, commit=True) + except Exception as e: + print("createUserTable failed because: \n", e) + + +def createOrganisationTable(): + try: + create_table_query = sql.SQL("\n" + "CREATE TABLE IF NOT EXISTS public.organisations\n" + "(\n" + "id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 " + "MAXVALUE 9223372036854775807 CACHE 1 ),\n" + " name text COLLATE pg_catalog.\"default\" NOT NULL,\n" + " CONSTRAINT organisationid PRIMARY KEY (id),\n" + " CONSTRAINT organisations_name_key UNIQUE (name)\n" + ")\n" + "\n" + "TABLESPACE pg_default;\n" + "\n" + "ALTER TABLE IF EXISTS public.organisations\n" + " OWNER to postgres;") execute_transaction(create_table_query, commit=True) except Exception as e: print("createUserTable failed because: \n", e) From 6d6372a9a5f51137dddb6c269d792abd88b6a3a6 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 16:00:59 +0100 Subject: [PATCH 050/254] feat: add all tables for dev --- flask_app/dev.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index eb95f16e..a02417a2 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -1,6 +1,6 @@ from flask import Blueprint -from postgres.transactions import createUserTable +from postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, createMembershipTable dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') @@ -9,8 +9,9 @@ def createTables(): try: createUserTable() - return 'createUserTable successfully' + createDocumentsTable() + createOrganisationTable() + createMembershipTable() + return 'create Tables successfully' except Exception as e: - print("createTables failed because: \n", e) - - return 'Table created successfully' + print("create Tables failed because: \n", e) From b8459601358b0f0f18159b674fd1f2b71ab8e908 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 12 Dec 2023 16:06:54 +0100 Subject: [PATCH 051/254] spelling --- flask_app/endpoints.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 8a321cbc..88fb47e0 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -17,7 +17,7 @@ def upload_files(): token = tokenDecode(authorization) - dokument_ids: list = [] + document_ids: list = [] for file in files: content_type = file.content_type @@ -26,15 +26,15 @@ def upload_files(): content = str(file.stream.read().decode('utf-8')) dokument_id = addDocument(filename, content, organisation_id, token.id) print(dokument_id) - dokument_ids.append(dokument_id) + document_ids.append(dokument_id) else: - dokument_ids.append(f"wrong type {content_type}") + document_ids.append(f"wrong type {content_type}") - if all(isinstance(dokument_ids, str) for _ in dokument_ids): - return make_response(dokument_ids, 400) - if any(isinstance(dokument_ids, str) for _ in dokument_ids): - return make_response(dokument_ids, 207) - return make_response(dokument_ids, 201) + if all(isinstance(document_ids, str) for _ in document_ids): + return make_response(document_ids, 400) + if any(isinstance(document_ids, str) for _ in document_ids): + return make_response(document_ids, 207) + return make_response(document_ids, 201) except Exception as e: return make_response({"message": "Upload failed", "details": str(e)}, 500) From 935e738277f22808ca148bc956a270dd9c65d2b7 Mon Sep 17 00:00:00 2001 From: phil1436 Date: Tue, 12 Dec 2023 16:42:34 +0100 Subject: [PATCH 052/254] login bug fix --- backend-requirements.txt | 2 +- postgres/queries.py | 2 +- postgres/transactions.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 9d3071a3..1f3ca00f 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -3,7 +3,7 @@ flask==3.0.0 Flask_Cors==4.0.0 gunicorn==21.2.0 psycopg2~=2.9.9 -bcrypt~=4.0.1 +bcrypt==4.1.1 PyJWT~=2.8.0 wheel~=0.40.0 tornado~=6.4 diff --git a/postgres/queries.py b/postgres/queries.py index bd35f357..a4da8946 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -28,7 +28,7 @@ def checkPassword(user: str, password: str) -> bool: result = execute_query(select_query, (user,)) try: if result[0]: - stored_password = bytes(result[0][0]) # sketchy conversion but works + stored_password = bytes(result[0][0].encode('utf-8')) # sketchy conversion but works return bcrypt.checkpw(password.encode('utf-8'), stored_password) return False diff --git a/postgres/transactions.py b/postgres/transactions.py index 65fdc2d1..8f4cf934 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -123,7 +123,7 @@ def addUser(user: str, password: str): salt = bcrypt.gensalt() pwHash = bcrypt.hashpw(pwBytes, salt) # Needed this for the correct password check dont know why... - pwHash = pwHash.decode('utf-8') + #pwHash = pwHash.decode('utf-8') insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") data_to_insert = (user, pwHash) From b118d623df74f1e017f94108fd23e0209c193605 Mon Sep 17 00:00:00 2001 From: phil1436 Date: Tue, 12 Dec 2023 16:43:23 +0100 Subject: [PATCH 053/254] formatting --- ROUTES.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index 54a3ce78..0954bf26 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -16,7 +16,6 @@ The Flask app is running by default on port 8000. Here we assume that the app is **GET** - ``` http://localhost:8000/ ``` @@ -102,6 +101,7 @@ http://localhost:8000/login **POST** Upload files. + ``` http://localhost:8000/data/upload ``` @@ -134,6 +134,7 @@ http://localhost:8000/data/upload **POST** Create tables (Development). + ``` http://localhost:8000/create-tables -``` \ No newline at end of file +``` From fe1546c8514acb8a1a5a37d00864a7499474fc9e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:07:34 +0100 Subject: [PATCH 054/254] fix(tokenDecode): cast id to int so id 1 will not be interpreted into true --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 38879544..0efd6dba 100644 --- a/config.py +++ b/config.py @@ -21,7 +21,7 @@ def tokenEncode(obj: dict[str, Any]): def tokenDecode(string: str): decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) user = decoded_token.get('user') - _id = decoded_token.get('id') + _id = int(decoded_token.get('id')) exp = decoded_token.get('exp') return Token(user, _id, exp) From a9a7ba2c74470287a9cbf7cff33ce09f27c0e94b Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:10:07 +0100 Subject: [PATCH 055/254] refactor: remove print --- flask_app/endpoints.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 88fb47e0..85929591 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -25,7 +25,6 @@ def upload_files(): filename = file.filename content = str(file.stream.read().decode('utf-8')) dokument_id = addDocument(filename, content, organisation_id, token.id) - print(dokument_id) document_ids.append(dokument_id) else: document_ids.append(f"wrong type {content_type}") From b7ce6a00b029acb368fcb8ed667363e55439f0bd Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:12:29 +0100 Subject: [PATCH 056/254] feat(dev): add get document --- flask_app/dev.py | 12 +++++++++++- postgres/queries.py | 3 +-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index a02417a2..92d921d8 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -1,5 +1,6 @@ -from flask import Blueprint +from flask import Blueprint, make_response +from postgres.queries import _getDocument from postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, createMembershipTable dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') @@ -15,3 +16,12 @@ def createTables(): return 'create Tables successfully' except Exception as e: print("create Tables failed because: \n", e) + + +@dev_routes.route('/getDocument/<_id>', methods=['GET']) +def get_document(_id): + try: + response = _getDocument(_id) + return make_response(response, 200) + except Exception as e: + return make_response({"message": f"getFile with {_id} ", "details": str(e)}, 400) diff --git a/postgres/queries.py b/postgres/queries.py index a4da8946..e4dd52c5 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -64,8 +64,7 @@ def _getDocument(documentId: int): try: if result[0]: content = result[0] - return content + return str(content) except Exception as e: print("checkOrganisationAuthorisation failed because: \n", e) - return 99 From 7aa5c3c2b342e665defe16b772538d13b4257a4a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:18:17 +0100 Subject: [PATCH 057/254] feat(dev): add get document --- ROUTES.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ROUTES.md b/ROUTES.md index 0954bf26..9ff95812 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -127,6 +127,25 @@ http://localhost:8000/data/upload Returns a list of documentIds. ``` +## Get Dokument + +**POST** + +get file. + +``` +http://localhost:8000/dev/getDocument/<_id> +``` + +### Body + +- None + +### Response + +- String of File Content + + --- ## create-tables From f332ace70902277d202028235f7c4a02b7f9dd6b Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:42:17 +0100 Subject: [PATCH 058/254] feat(addOrganisation): add Organisation id as an return --- postgres/transactions.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 8f4cf934..071922ab 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -173,8 +173,11 @@ def addOrganisation(organisationName: str, sessionToken: str): userid = token.id insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " - "INSERT INTO membership (userid,organisationid) select (%s),id from a") - execute_transaction(insert_query, (organisationName, userid), commit=True) + "INSERT INTO membership (userid,organisationid) select (%s),id from a returning organisationid") + organisation_id = execute_transaction(insert_query, (organisationName, userid), commit=True) + + organisation_id = organisation_id if isinstance(organisation_id, int) else None + return organisation_id except Exception as e: print("addOrganisation failed because: \n", e) From bb715d5ad4e42948a842f9f50d866c532cdadcf1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:44:44 +0100 Subject: [PATCH 059/254] feat(addOrganisation): add addOrganisation as an endpoint --- flask_app/user.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flask_app/user.py b/flask_app/user.py index 4e28cd71..ac087422 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -44,4 +44,18 @@ def login(): return make_response({'message': 'Wrong Password'}, 401) +@user_management.route('/creatOrganisation', methods=['POST']) +def creat_organisation(): + form = request.form + authorization = form.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({}, 401) + organisation_name = form.get("organisationName") + + organisation_id = addOrganisation(organisation_name, authorization) + + if organisation_id is None: + return make_response({'organisation_id': ""}, 422) + return make_response({'organisation_id': organisation_id}, 200) From 53307aaaeafcefcd07d63f53ce23effce2da2b4f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:45:13 +0100 Subject: [PATCH 060/254] Revert "login bug fix" This reverts commit 935e738277f22808ca148bc956a270dd9c65d2b7. --- backend-requirements.txt | 2 +- postgres/queries.py | 2 +- postgres/transactions.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 1f3ca00f..9d3071a3 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -3,7 +3,7 @@ flask==3.0.0 Flask_Cors==4.0.0 gunicorn==21.2.0 psycopg2~=2.9.9 -bcrypt==4.1.1 +bcrypt~=4.0.1 PyJWT~=2.8.0 wheel~=0.40.0 tornado~=6.4 diff --git a/postgres/queries.py b/postgres/queries.py index e4dd52c5..a5221d37 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -28,7 +28,7 @@ def checkPassword(user: str, password: str) -> bool: result = execute_query(select_query, (user,)) try: if result[0]: - stored_password = bytes(result[0][0].encode('utf-8')) # sketchy conversion but works + stored_password = bytes(result[0][0]) # sketchy conversion but works return bcrypt.checkpw(password.encode('utf-8'), stored_password) return False diff --git a/postgres/transactions.py b/postgres/transactions.py index 071922ab..352dbc2a 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -123,7 +123,7 @@ def addUser(user: str, password: str): salt = bcrypt.gensalt() pwHash = bcrypt.hashpw(pwBytes, salt) # Needed this for the correct password check dont know why... - #pwHash = pwHash.decode('utf-8') + pwHash = pwHash.decode('utf-8') insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") data_to_insert = (user, pwHash) From ff0874f448108d648fa019cb00799d785269d0ed Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 13:46:21 +0100 Subject: [PATCH 061/254] feat(addOrganisation): add addOrganisation as an endpoint --- flask_app/user.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index ac087422..863a6328 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -1,9 +1,9 @@ # main_routes.py from flask import Blueprint, request, make_response -from config import Token, tokenEncode +from config import Token, tokenEncode, tokenDecode from postgres.queries import checkPassword -from postgres.transactions import addUser +from postgres.transactions import addUser, addOrganisation user_management = Blueprint('user_management', __name__) From 80520733576ed2b7e532356ae10e9343b9c6254c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 14:03:13 +0100 Subject: [PATCH 062/254] feat(login): add userid to token --- postgres/queries.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index a5221d37..08600b07 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -1,3 +1,5 @@ +from typing import Union + import bcrypt from psycopg2 import sql from postgres.util import execute_query @@ -23,13 +25,15 @@ def getOrganisationIDsFromUserId(userID: int): return execute_query(select_query, (userID,)) -def checkPassword(user: str, password: str) -> bool: - select_query = sql.SQL("SELECT password as pw FROM users WHERE username = %s;") +def checkPassword(user: str, password: str) -> Union[tuple[bool, int], bool]: + select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") result = execute_query(select_query, (user,)) try: - if result[0]: + if result[0][0]: stored_password = bytes(result[0][0]) # sketchy conversion but works - return bcrypt.checkpw(password.encode('utf-8'), stored_password) + check = bcrypt.checkpw(password.encode('utf-8'), stored_password) + if check: + return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(result[0][1]) return False From ff97bc05cbf4479e6ed352ecee994b76cefb077a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 14:03:28 +0100 Subject: [PATCH 063/254] feat(login): add userid to token --- flask_app/user.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 863a6328..ba41ddf6 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -32,10 +32,10 @@ def login(): username = data.get('username') password = data.get('password') - _correct = checkPassword(username, password) + _correct, _id = checkPassword(username, password) if _correct: - user = Token(username, _correct) + user = Token(username, _id) token = tokenEncode(user.json()) return make_response({'message': 'Log in successfully', From 03eea15a3c8888ad7c1db4a9aaeb497c4331dab1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 15:21:21 +0100 Subject: [PATCH 064/254] feat(execute_transaction): add IntegrityError as an error --- postgres/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/postgres/util.py b/postgres/util.py index e0c34af7..37467a7e 100644 --- a/postgres/util.py +++ b/postgres/util.py @@ -1,5 +1,5 @@ import psycopg2 -from psycopg2 import extensions +from psycopg2 import extensions, IntegrityError DB_NAME = "userManagement" DB_USER = "postgres" @@ -37,6 +37,9 @@ def execute_transaction(query, params=None, commit=False): result = cur.fetchall() return result if result else None + except IntegrityError as e: + raise IntegrityError(f"Query execution failed for transaction: {query} \nParams: {params} \nError: {e}") + except Exception as e: raise Exception(f"Query execution failed for transaction: {query} \nParams: {params} \nError: {e}") From ee0657eafd88bac8f3a500d681924ff81fc74467 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 15:21:53 +0100 Subject: [PATCH 065/254] feat(creat_organisation): add error handling --- flask_app/user.py | 10 ++++++---- postgres/transactions.py | 9 ++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index ba41ddf6..1d18ef0c 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -46,16 +46,18 @@ def login(): @user_management.route('/creatOrganisation', methods=['POST']) def creat_organisation(): - form = request.form - authorization = form.get("authorization") + data = request.get_json() + authorization = data.get("authorization") token = tokenDecode(authorization) if token is None: return make_response({}, 401) - organisation_name = form.get("organisationName") + organisation_name = data.get("organisationName") - organisation_id = addOrganisation(organisation_name, authorization) + organisation_id, error = addOrganisation(organisation_name, authorization) + if error: + return make_response({"error": error}, 422) if organisation_id is None: return make_response({'organisation_id': ""}, 422) return make_response({'organisation_id': organisation_id}, 200) diff --git a/postgres/transactions.py b/postgres/transactions.py index 352dbc2a..083b5a09 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -1,5 +1,5 @@ import bcrypt -from psycopg2 import sql +from psycopg2 import sql, IntegrityError from config import Token, Authorisation, tokenDecode from postgres.queries import checkPassword from postgres.util import execute_transaction @@ -176,8 +176,11 @@ def addOrganisation(organisationName: str, sessionToken: str): "INSERT INTO membership (userid,organisationid) select (%s),id from a returning organisationid") organisation_id = execute_transaction(insert_query, (organisationName, userid), commit=True) - organisation_id = organisation_id if isinstance(organisation_id, int) else None - return organisation_id + organisation_id = int(organisation_id) + return organisation_id, None + + except IntegrityError: + return None, "name already exists." except Exception as e: print("addOrganisation failed because: \n", e) From 7243dc144da9bc4a95e13e32c91e73cbbb9b98f4 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 15:26:44 +0100 Subject: [PATCH 066/254] feat(creat_organisation): add error handling --- ROUTES.md | 36 ++++++++++++++++++++++++++++++++++++ flask_app/user.py | 4 +--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index 9ff95812..d1843688 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -93,6 +93,42 @@ http://localhost:8000/login "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1..." } ``` + +--- + +## Organisation + +**POST** + +creatOrganisation + +``` +http://localhost:8000/creatOrganisation +``` + +### Body + +```json +{ + "authorization": "---", + "organisationName": "---" +} +``` + +### Response + +- 409: duplication **Conflict**: + ```json + { + "error": "name already exists." + } + ``` +- 200: **success**: + ```json + { + "organisation_id": "---" + } + ``` --- diff --git a/flask_app/user.py b/flask_app/user.py index 1d18ef0c..78473603 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -57,7 +57,5 @@ def creat_organisation(): organisation_id, error = addOrganisation(organisation_name, authorization) if error: - return make_response({"error": error}, 422) - if organisation_id is None: - return make_response({'organisation_id': ""}, 422) + return make_response({"error": error}, 409) return make_response({'organisation_id': organisation_id}, 200) From aed7bd3497860c4cbdf144f863b4c494779f8b90 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 16:15:08 +0100 Subject: [PATCH 067/254] feat(addUserToOrganisation): add endpoint as well as adjusting the underlying query --- flask_app/user.py | 20 ++++++++++++++++- postgres/transactions.py | 47 ++++++++++++++++++++++------------------ 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 78473603..f1ec3217 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -3,7 +3,7 @@ from config import Token, tokenEncode, tokenDecode from postgres.queries import checkPassword -from postgres.transactions import addUser, addOrganisation +from postgres.transactions import addUser, addOrganisation, addUserToOrganisation user_management = Blueprint('user_management', __name__) @@ -59,3 +59,21 @@ def creat_organisation(): if error: return make_response({"error": error}, 409) return make_response({'organisation_id': organisation_id}, 200) + + +@user_management.route('/addUserToOrganisation', methods=['POST']) +def add_user_to_organisation(): + data = request.get_json() + authorization = data.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({}, 401) + + organisation_name = data.get("organisationName") + new_user = data.get("newUser") + + organisation_id, error = addUserToOrganisation(organisation_name, authorization, new_user) + + if error: + return make_response({"error": error}, 409) + return make_response({'organisation_id': organisation_id}, 200) diff --git a/postgres/transactions.py b/postgres/transactions.py index 083b5a09..603a2925 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -191,28 +191,33 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str token: Token = tokenDecode(sessionToken) userid = token.id - insert_query = sql.SQL(""" - WITH org AS ( - SELECT userid, organisationid - FROM membership - WHERE organisationid = (SELECT id FROM organisations WHERE name = %s) - ), user_info AS ( - SELECT id - FROM users - WHERE username = %s - ) - INSERT INTO membership (userid, organisationid) - SELECT %s, org.organisationid - FROM org, user_info, membership AS m - WHERE org.organisationid = m.organisationid - AND user_info.id = m.userid - AND m.authorisation >= %s - AND %s >= %s - """) + insert_query = sql.SQL("""WITH addUser AS ( + SELECT id + FROM users + WHERE username = (%s) -- new User string + ), + ismemberandadmin as ( + SELECT organisationid + from membership + WHERE organisationid = (SELECT id FROM organisations WHERE name = (%s)) -- org name string + and userid = (%s) -- user id int + and authorisation < (%s) -- is minimum permission + ) +INSERT INTO membership (userid, organisationid) + SELECT a.id, m.organisationid + FROM addUser a, ismemberandadmin m + returning organisationid""") + + organisation_id = execute_transaction(insert_query, + (newUser, organisationName, userid, + str(Authorisation.Admin.value)), commit=True) + if organisation_id is None: + return None, "you have no privileges in this organisation" + + return int(organisation_id), None - execute_transaction(insert_query, (organisationName, newUser, userid, userid, - str(Authorisation.Admin.value), userid), - commit=True) + except IntegrityError: + return None, "name already exists." except Exception as e: print("addUserToOrganisation failed because: \n", e) From 22709d25c2a2bdd2156e8d30fe7d77639632d17c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 16:17:38 +0100 Subject: [PATCH 068/254] feat(addUserToOrganisation): add addUserToOrganisation to routing --- ROUTES.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ROUTES.md b/ROUTES.md index d1843688..7063dc7e 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -129,6 +129,39 @@ http://localhost:8000/creatOrganisation "organisation_id": "---" } ``` + +**POST** + +creatOrganisation + +``` +http://localhost:8000/addUserToOrganisation +``` + +### Body + +```json +{ + "authorization": "---", + "organisationName": "---", + "newUser": "---" +} +``` + +### Response + +- 409: duplication **Conflict**: (temp) + ```json + { + "error": "error message" + } + ``` +- 200: **success**: + ```json + { + "organisation_id": "---" + } + ``` --- From 8d788171d10282abc438055ae81c589e7d7cf716 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 17:24:56 +0100 Subject: [PATCH 069/254] build: update dependencies --- backend-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 9d3071a3..4cf9254f 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -3,7 +3,7 @@ flask==3.0.0 Flask_Cors==4.0.0 gunicorn==21.2.0 psycopg2~=2.9.9 -bcrypt~=4.0.1 +bcrypt~=4.1.1 PyJWT~=2.8.0 wheel~=0.40.0 tornado~=6.4 From 8ae39e3b551c698366925a02922c3a05b10ea39e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 17:30:35 +0100 Subject: [PATCH 070/254] build: update dependencies --- backend-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 4cf9254f..9beb4274 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -5,7 +5,7 @@ gunicorn==21.2.0 psycopg2~=2.9.9 bcrypt~=4.1.1 PyJWT~=2.8.0 -wheel~=0.40.0 +wheel==0.42.0 tornado~=6.4 setuptools~=69.0.2 werkzeug~=3.0.1 From a81c7e18c59bd4666625de054efddc4891fc865b Mon Sep 17 00:00:00 2001 From: phil1436 Date: Wed, 13 Dec 2023 17:35:49 +0100 Subject: [PATCH 071/254] encoding bug fix --- postgres/queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/queries.py b/postgres/queries.py index 08600b07..21abe1b6 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -30,7 +30,7 @@ def checkPassword(user: str, password: str) -> Union[tuple[bool, int], bool]: result = execute_query(select_query, (user,)) try: if result[0][0]: - stored_password = bytes(result[0][0]) # sketchy conversion but works + stored_password = bytes(result[0][0].encode('utf-8')) # sketchy conversion but works check = bcrypt.checkpw(password.encode('utf-8'), stored_password) if check: return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(result[0][1]) From 7d96193850169c702cdcf9c2dffc4bc293d2c782 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 18:00:06 +0100 Subject: [PATCH 072/254] fix(checkPassword): anticipate different types --- postgres/queries.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index 21abe1b6..2e8de684 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -1,4 +1,4 @@ -from typing import Union +from typing import Union, Tuple import bcrypt from psycopg2 import sql @@ -25,21 +25,24 @@ def getOrganisationIDsFromUserId(userID: int): return execute_query(select_query, (userID,)) -def checkPassword(user: str, password: str) -> Union[tuple[bool, int], bool]: +def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") - result = execute_query(select_query, (user,)) + _password, _id = execute_query(select_query, (user,))[0] try: - if result[0][0]: - stored_password = bytes(result[0][0].encode('utf-8')) # sketchy conversion but works + if _password: + if isinstance(_password, str): + stored_password = bytes(_password.encode('utf-8')) + else: + stored_password = bytes(_password) check = bcrypt.checkpw(password.encode('utf-8'), stored_password) if check: - return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(result[0][1]) + return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(_id) - return False + return False, "" except Exception as e: print("checkPassword failed because: \n", e) - return False + return False, str(e) def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: From 6a079e049bb2737b6d3b48fc5610e1d95453937d Mon Sep 17 00:00:00 2001 From: phil1436 Date: Wed, 13 Dec 2023 18:19:45 +0100 Subject: [PATCH 073/254] createorg bug fix --- flask_app/user.py | 1 - postgres/transactions.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index f1ec3217..71ca313a 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -55,7 +55,6 @@ def creat_organisation(): organisation_name = data.get("organisationName") organisation_id, error = addOrganisation(organisation_name, authorization) - if error: return make_response({"error": error}, 409) return make_response({'organisation_id': organisation_id}, 200) diff --git a/postgres/transactions.py b/postgres/transactions.py index 603a2925..83e46999 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -175,8 +175,7 @@ def addOrganisation(organisationName: str, sessionToken: str): insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " "INSERT INTO membership (userid,organisationid) select (%s),id from a returning organisationid") organisation_id = execute_transaction(insert_query, (organisationName, userid), commit=True) - - organisation_id = int(organisation_id) + organisation_id = int(organisation_id[0][0]) return organisation_id, None except IntegrityError: From a5dbde9acff97d1dea7f337c2095dccbcc674472 Mon Sep 17 00:00:00 2001 From: phil1436 Date: Wed, 13 Dec 2023 18:33:53 +0100 Subject: [PATCH 074/254] undo createorg bug fix --- postgres/transactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 83e46999..31086447 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -175,7 +175,7 @@ def addOrganisation(organisationName: str, sessionToken: str): insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " "INSERT INTO membership (userid,organisationid) select (%s),id from a returning organisationid") organisation_id = execute_transaction(insert_query, (organisationName, userid), commit=True) - organisation_id = int(organisation_id[0][0]) + organisation_id = int(organisation_id) return organisation_id, None except IntegrityError: From a86e6c526e0917cd0758e922e5742cdc59d355de Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 13 Dec 2023 18:34:46 +0100 Subject: [PATCH 075/254] feat(dev): alter table creation --- flask_app/dev.py | 6 +- postgres/transactions.py | 155 ++++++++++++++++++++------------------- 2 files changed, 82 insertions(+), 79 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index 92d921d8..fe90e82a 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -1,7 +1,8 @@ from flask import Blueprint, make_response from postgres.queries import _getDocument -from postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, createMembershipTable +from postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, createMembershipTable, \ + dropTables dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') @@ -9,10 +10,11 @@ @dev_routes.route('/createTables', methods=['POST']) def createTables(): try: + dropTables() createUserTable() - createDocumentsTable() createOrganisationTable() createMembershipTable() + createDocumentsTable() return 'create Tables successfully' except Exception as e: print("create Tables failed because: \n", e) diff --git a/postgres/transactions.py b/postgres/transactions.py index 603a2925..5c5f554f 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -8,10 +8,10 @@ # WARNING: This is only for development purposes! def dropTables(): try: - drop_table_query = sql.SQL("DROP TABLE IF EXISTS public.users;\n" - "DROP TABLE IF EXISTS public.documents;\n" - "DROP TABLE IF EXISTS public.membership;\n" - "DROP TABLE IF EXISTS public.organisations;") + drop_table_query = sql.SQL("DROP TABLE IF EXISTS public.users CASCADE;\n" + "DROP TABLE IF EXISTS public.documents CASCADE;\n" + "DROP TABLE IF EXISTS public.membership CASCADE;\n" + "DROP TABLE IF EXISTS public.organisations CASCADE;") execute_transaction(drop_table_query, commit=True) except Exception as e: print("dropTables failed because: \n", e) @@ -19,11 +19,17 @@ def dropTables(): def createUserTable(): try: - create_table_query = sql.SQL("CREATE TABLE IF NOT EXISTS users (\n" - " id SERIAL PRIMARY KEY,\n" - " username VARCHAR(100) UNIQUE NOT NULL,\n" - " password VARCHAR(1000) NOT NULL\n" - " );") + create_table_query = sql.SQL("""CREATE TABLE public.users +( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), + username text COLLATE pg_catalog."default" NOT NULL, + password bytea NOT NULL, + CONSTRAINT userid PRIMARY KEY (id), + CONSTRAINT unique_username UNIQUE (username) +) + +TABLESPACE pg_default; +""") execute_transaction(create_table_query, commit=True) except Exception as e: print("createUserTable failed because: \n", e) @@ -31,31 +37,27 @@ def createUserTable(): def createDocumentsTable(): try: - create_table_query = sql.SQL("CREATE TABLE IF NOT EXISTS public.documents\n" - "(\n" - "id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 " - "MAXVALUE 9223372036854775807 CACHE 1 ),\n" - " name text COLLATE pg_catalog.\"default\" NOT NULL,\n" - " content text COLLATE pg_catalog.\"default\" NOT NULL,\n" - " organisationid bigint NOT NULL,\n" - " userid bigint NOT NULL,\n" - " CONSTRAINT dokumentid PRIMARY KEY (id),\n" - " CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid)\n" - " REFERENCES public.organisations (id) MATCH SIMPLE\n" - " ON UPDATE NO ACTION\n" - " ON DELETE NO ACTION\n" - " NOT VALID,\n" - " CONSTRAINT documents_userid_fkey FOREIGN KEY (userid)\n" - " REFERENCES public.users (id) MATCH SIMPLE\n" - " ON UPDATE NO ACTION\n" - " ON DELETE NO ACTION\n" - " NOT VALID\n" - ")\n" - "\n" - "TABLESPACE pg_default;\n" - "\n" - "ALTER TABLE IF EXISTS public.documents\n" - " OWNER to postgres;") + create_table_query = sql.SQL("""CREATE TABLE public.documents +( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), + name text COLLATE pg_catalog."default" NOT NULL, + content text COLLATE pg_catalog."default" NOT NULL, + organisationid bigint NOT NULL, + userid bigint NOT NULL, + CONSTRAINT dokumentid PRIMARY KEY (id), + CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid) + REFERENCES public.organisations (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID, + CONSTRAINT documents_userid_fkey FOREIGN KEY (userid) + REFERENCES public.users (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID +) + +TABLESPACE pg_default;""") execute_transaction(create_table_query, commit=True) except Exception as e: print("createUserTable failed because: \n", e) @@ -63,34 +65,36 @@ def createDocumentsTable(): def createMembershipTable(): try: - create_table_query = sql.SQL("" - "\n" - "CREATE TABLE IF NOT EXISTS public.membership\n" - "(\n" - " userid bigint NOT NULL,\n" - " organisationid bigint NOT NULL,\n" - " authorisation bigint NOT NULL DEFAULT 0,\n" - " CONSTRAINT membership_pkey PRIMARY KEY (userid, organisationid),\n" - " CONSTRAINT membership_organisationid_fkey FOREIGN KEY (organisationid)\n" - " REFERENCES public.organisations (id) MATCH SIMPLE\n" - " ON UPDATE NO ACTION\n" - " ON DELETE NO ACTION\n" - " NOT VALID,\n" - " CONSTRAINT membership_userid_fkey FOREIGN KEY (userid)\n" - " REFERENCES public.users (id) MATCH SIMPLE\n" - " ON UPDATE NO ACTION\n" - " ON DELETE NO ACTION\n" - " NOT VALID\n" - ")\n" - "\n" - "TABLESPACE pg_default;\n" - "\n" - "ALTER TABLE IF EXISTS public.membership\n" - " OWNER to postgres;\n" - "CREATE INDEX IF NOT EXISTS fki_organisationid\n" - " ON public.membership USING btree\n" - " (organisationid ASC NULLS LAST)\n" - " TABLESPACE pg_default;") + create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS public.membership +( + userid bigint NOT NULL, + organisationid bigint NOT NULL, + authorisation bigint NOT NULL DEFAULT 0, + CONSTRAINT membership_pkey PRIMARY KEY (userid, organisationid), + CONSTRAINT membership_organisationid_fkey FOREIGN KEY (organisationid) + REFERENCES public.organisations (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID, + CONSTRAINT membership_userid_fkey FOREIGN KEY (userid) + REFERENCES public.users (id) MATCH SIMPLE + ON UPDATE NO ACTION + ON DELETE NO ACTION + NOT VALID +) + +TABLESPACE pg_default; + +ALTER TABLE IF EXISTS public.membership + OWNER to postgres; +-- Index: fki_organisationid + +-- DROP INDEX IF EXISTS public.fki_organisationid; + +CREATE INDEX IF NOT EXISTS fki_organisationid + ON public.membership USING btree + (organisationid ASC NULLS LAST) + TABLESPACE pg_default;""") execute_transaction(create_table_query, commit=True) except Exception as e: print("createUserTable failed because: \n", e) @@ -98,20 +102,17 @@ def createMembershipTable(): def createOrganisationTable(): try: - create_table_query = sql.SQL("\n" - "CREATE TABLE IF NOT EXISTS public.organisations\n" - "(\n" - "id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 " - "MAXVALUE 9223372036854775807 CACHE 1 ),\n" - " name text COLLATE pg_catalog.\"default\" NOT NULL,\n" - " CONSTRAINT organisationid PRIMARY KEY (id),\n" - " CONSTRAINT organisations_name_key UNIQUE (name)\n" - ")\n" - "\n" - "TABLESPACE pg_default;\n" - "\n" - "ALTER TABLE IF EXISTS public.organisations\n" - " OWNER to postgres;") + create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS public.organisations +( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), + name text COLLATE pg_catalog."default" NOT NULL, + CONSTRAINT organisationid PRIMARY KEY (id), + CONSTRAINT organisations_name_key UNIQUE (name) +) + +TABLESPACE pg_default; + +""") execute_transaction(create_table_query, commit=True) except Exception as e: print("createUserTable failed because: \n", e) @@ -210,7 +211,7 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str organisation_id = execute_transaction(insert_query, (newUser, organisationName, userid, - str(Authorisation.Admin.value)), commit=True) + str(Authorisation.Admin.value)), commit=True) if organisation_id is None: return None, "you have no privileges in this organisation" From 7d417260432eb2fb7e11d3d123f8bd10d35b347b Mon Sep 17 00:00:00 2001 From: phil1436 Date: Wed, 13 Dec 2023 19:05:59 +0100 Subject: [PATCH 076/254] fix(createorg): cannot int(list) --- postgres/transactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 1687d96f..a034e06a 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -176,7 +176,7 @@ def addOrganisation(organisationName: str, sessionToken: str): insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " "INSERT INTO membership (userid,organisationid) select (%s),id from a returning organisationid") organisation_id = execute_transaction(insert_query, (organisationName, userid), commit=True) - organisation_id = int(organisation_id) + organisation_id = int(organisation_id[0][0]) return organisation_id, None except IntegrityError: From d0172fc2f51412ecfbe3629b237536b12a250f19 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 14:58:16 +0100 Subject: [PATCH 077/254] feat(dev): add dropTables and added adjustable schemata --- flask_app/dev.py | 25 ++++++++++++++++--------- postgres/transactions.py | 40 ++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index fe90e82a..1efab8ce 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -7,17 +7,24 @@ dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') -@dev_routes.route('/createTables', methods=['POST']) -def createTables(): +@dev_routes.route('/createTables/', methods=['POST']) +def create_tables(schema): try: - dropTables() - createUserTable() - createOrganisationTable() - createMembershipTable() - createDocumentsTable() - return 'create Tables successfully' + createUserTable(schema) + createOrganisationTable(schema) + createMembershipTable(schema) + createDocumentsTable(schema) + return f'create Tables in {schema} successfully' except Exception as e: - print("create Tables failed because: \n", e) + print("create Tables in {schema} failed because: \n", e) + +@dev_routes.route('/dropTables/', methods=['POST']) +def drop_tables(schema): + try: + dropTables(schema) + return f'drop Tables in {schema} successfully' + except Exception as e: + print("drop Tables in {schema} failed because: \n", e) @dev_routes.route('/getDocument/<_id>', methods=['GET']) diff --git a/postgres/transactions.py b/postgres/transactions.py index a034e06a..a47a6205 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -6,20 +6,20 @@ # WARNING: This is only for development purposes! -def dropTables(): +def dropTables(schema): try: - drop_table_query = sql.SQL("DROP TABLE IF EXISTS public.users CASCADE;\n" - "DROP TABLE IF EXISTS public.documents CASCADE;\n" - "DROP TABLE IF EXISTS public.membership CASCADE;\n" - "DROP TABLE IF EXISTS public.organisations CASCADE;") + drop_table_query = sql.SQL(f"DROP TABLE IF EXISTS {schema}.users CASCADE;\n" + f"DROP TABLE IF EXISTS {schema}.documents CASCADE;\n" + f"DROP TABLE IF EXISTS {schema}.membership CASCADE;\n" + f"DROP TABLE IF EXISTS {schema}.organisations CASCADE;") execute_transaction(drop_table_query, commit=True) except Exception as e: print("dropTables failed because: \n", e) -def createUserTable(): +def createUserTable(schema): try: - create_table_query = sql.SQL("""CREATE TABLE public.users + create_table_query = sql.SQL(f"""CREATE TABLE {schema}.users ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), username text COLLATE pg_catalog."default" NOT NULL, @@ -35,9 +35,9 @@ def createUserTable(): print("createUserTable failed because: \n", e) -def createDocumentsTable(): +def createDocumentsTable(schema): try: - create_table_query = sql.SQL("""CREATE TABLE public.documents + create_table_query = sql.SQL(f"""CREATE TABLE {schema}.documents ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), name text COLLATE pg_catalog."default" NOT NULL, @@ -46,12 +46,12 @@ def createDocumentsTable(): userid bigint NOT NULL, CONSTRAINT dokumentid PRIMARY KEY (id), CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid) - REFERENCES public.organisations (id) MATCH SIMPLE + REFERENCES {schema}.organisations (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION NOT VALID, CONSTRAINT documents_userid_fkey FOREIGN KEY (userid) - REFERENCES public.users (id) MATCH SIMPLE + REFERENCES {schema}.users (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION NOT VALID @@ -63,21 +63,21 @@ def createDocumentsTable(): print("createUserTable failed because: \n", e) -def createMembershipTable(): +def createMembershipTable(schema): try: - create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS public.membership + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.membership ( userid bigint NOT NULL, organisationid bigint NOT NULL, authorisation bigint NOT NULL DEFAULT 0, CONSTRAINT membership_pkey PRIMARY KEY (userid, organisationid), CONSTRAINT membership_organisationid_fkey FOREIGN KEY (organisationid) - REFERENCES public.organisations (id) MATCH SIMPLE + REFERENCES {schema}.organisations (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION NOT VALID, CONSTRAINT membership_userid_fkey FOREIGN KEY (userid) - REFERENCES public.users (id) MATCH SIMPLE + REFERENCES {schema}.users (id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION NOT VALID @@ -85,14 +85,14 @@ def createMembershipTable(): TABLESPACE pg_default; -ALTER TABLE IF EXISTS public.membership +ALTER TABLE IF EXISTS {schema}.membership OWNER to postgres; -- Index: fki_organisationid --- DROP INDEX IF EXISTS public.fki_organisationid; +-- DROP INDEX IF EXISTS {schema}.fki_organisationid; CREATE INDEX IF NOT EXISTS fki_organisationid - ON public.membership USING btree + ON {schema}.membership USING btree (organisationid ASC NULLS LAST) TABLESPACE pg_default;""") execute_transaction(create_table_query, commit=True) @@ -100,9 +100,9 @@ def createMembershipTable(): print("createUserTable failed because: \n", e) -def createOrganisationTable(): +def createOrganisationTable(schema): try: - create_table_query = sql.SQL("""CREATE TABLE IF NOT EXISTS public.organisations + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.organisations ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), name text COLLATE pg_catalog."default" NOT NULL, From afb5e1157575f8c41a78436e5dcc9d21897bbf9d Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 15:49:57 +0100 Subject: [PATCH 078/254] feat(dev): add createSchema and dropSchema --- postgres/transactions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/postgres/transactions.py b/postgres/transactions.py index a47a6205..41727be5 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -6,6 +6,25 @@ # WARNING: This is only for development purposes! + +def createSchema(schema): + try: + create_schema_query = sql.SQL("CREATE SCHEMA IF NOT EXISTS {};").format(sql.Identifier(schema)) + execute_transaction(create_schema_query, ()) + print(f"Schema {schema} created successfully.") + except Exception as e: + print(f"Error creating schema {schema}: {e}") + + +def dropSchema(schema): + try: + drop_schema_query = sql.SQL("DROP SCHEMA IF EXISTS {} CASCADE;").format(sql.Identifier(schema)) + execute_transaction(drop_schema_query, ()) + print(f"Schema {schema} dropped successfully.") + except Exception as e: + print(f"Error dropping schema {schema}: {e}") + + def dropTables(schema): try: drop_table_query = sql.SQL(f"DROP TABLE IF EXISTS {schema}.users CASCADE;\n" From 0c4851861ad0095d8412aa711ef64a0be55b24bf Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:00:56 +0100 Subject: [PATCH 079/254] feat(dev): add createSchema and dropSchema --- flask_app/dev.py | 6 ++++-- postgres/transactions.py | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/flask_app/dev.py b/flask_app/dev.py index 1efab8ce..1253eeab 100644 --- a/flask_app/dev.py +++ b/flask_app/dev.py @@ -2,7 +2,7 @@ from postgres.queries import _getDocument from postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, createMembershipTable, \ - dropTables + dropTables, dropSchema, createSchema dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') @@ -10,18 +10,20 @@ @dev_routes.route('/createTables/', methods=['POST']) def create_tables(schema): try: + createSchema(schema) createUserTable(schema) createOrganisationTable(schema) createMembershipTable(schema) createDocumentsTable(schema) return f'create Tables in {schema} successfully' except Exception as e: - print("create Tables in {schema} failed because: \n", e) + print(f"create Tables in {schema} failed because: \n", e) @dev_routes.route('/dropTables/', methods=['POST']) def drop_tables(schema): try: dropTables(schema) + dropSchema(schema) return f'drop Tables in {schema} successfully' except Exception as e: print("drop Tables in {schema} failed because: \n", e) diff --git a/postgres/transactions.py b/postgres/transactions.py index 41727be5..2b5b3d93 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -9,8 +9,8 @@ def createSchema(schema): try: - create_schema_query = sql.SQL("CREATE SCHEMA IF NOT EXISTS {};").format(sql.Identifier(schema)) - execute_transaction(create_schema_query, ()) + create_schema_query = sql.SQL(f"CREATE SCHEMA {schema};") + execute_transaction(create_schema_query, (), fetch=False) print(f"Schema {schema} created successfully.") except Exception as e: print(f"Error creating schema {schema}: {e}") @@ -18,8 +18,8 @@ def createSchema(schema): def dropSchema(schema): try: - drop_schema_query = sql.SQL("DROP SCHEMA IF EXISTS {} CASCADE;").format(sql.Identifier(schema)) - execute_transaction(drop_schema_query, ()) + drop_schema_query = sql.SQL(f"DROP SCHEMA IF EXISTS {schema} CASCADE;") + execute_transaction(drop_schema_query, (), fetch=False) print(f"Schema {schema} dropped successfully.") except Exception as e: print(f"Error dropping schema {schema}: {e}") From 7ebcffcec385f0f307dca503c5681ab22a19efd1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:01:59 +0100 Subject: [PATCH 080/254] fix: typo --- flask_app/user.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 71ca313a..247c41b2 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -44,8 +44,8 @@ def login(): return make_response({'message': 'Wrong Password'}, 401) -@user_management.route('/creatOrganisation', methods=['POST']) -def creat_organisation(): +@user_management.route('/createOrganisation', methods=['POST']) +def create_organisation(): data = request.get_json() authorization = data.get("authorization") token = tokenDecode(authorization) From 3df2e3810140e218f520ac2c1ac8cabcfe6c1dfa Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:08:27 +0100 Subject: [PATCH 081/254] fix: change return behavior and add optional fetch option --- postgres/util.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/postgres/util.py b/postgres/util.py index 37467a7e..cebf2fcb 100644 --- a/postgres/util.py +++ b/postgres/util.py @@ -1,5 +1,6 @@ import psycopg2 -from psycopg2 import extensions, IntegrityError +from psycopg2 import extensions, IntegrityError, sql +from psycopg2.sql import SQL DB_NAME = "userManagement" DB_USER = "postgres" @@ -22,7 +23,7 @@ def connectPG(): raise Exception("Connection failed because: \n", e) -def execute_transaction(query, params=None, commit=False): +def execute_transaction(query, params=None, commit=False ,fetch=True): conn = None cur = None try: @@ -34,8 +35,10 @@ def execute_transaction(query, params=None, commit=False): if commit: conn.commit() - result = cur.fetchall() - return result if result else None + if fetch: + result = cur.fetchall() + return result if result else None + return True except IntegrityError as e: raise IntegrityError(f"Query execution failed for transaction: {query} \nParams: {params} \nError: {e}") @@ -48,9 +51,10 @@ def execute_transaction(query, params=None, commit=False): conn.close() if cur: cur.close() + return False -def execute_query(query, params=None): +def execute_query(query: SQL, params=None): conn = None cur = None try: @@ -65,9 +69,9 @@ def execute_query(query, params=None): except Exception as e: raise Exception(f"Query execution failed for query:\n" - f"{query} \n" - f"Params: {params} \n" - f"Error: {e}") + f"{query} \n" + f"Params: {params} \n" + f"Error: {e}") finally: if conn: conn.close() From e668b69b6a3ec739fcf459a533908ce3ed0a7658 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:31:11 +0100 Subject: [PATCH 082/254] feat(delete_user): add --- flask_app/user.py | 21 ++++++++++++++++++++- postgres/transactions.py | 5 ++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 247c41b2..262dcec5 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -3,7 +3,7 @@ from config import Token, tokenEncode, tokenDecode from postgres.queries import checkPassword -from postgres.transactions import addUser, addOrganisation, addUserToOrganisation +from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser user_management = Blueprint('user_management', __name__) @@ -44,6 +44,25 @@ def login(): return make_response({'message': 'Wrong Password'}, 401) +@user_management.route('/deleteUser/', methods=['POST']) +def delete_user(): + data = request.get_json() + username = data.get('username') + password = data.get('password') + authorization = data.get("authorization") + + check, _id = checkPassword(username, password) + token = tokenDecode(authorization) + if not (token and check and token.id == _id): + return make_response({'message': 'User not authorised '}, 401) + + response = deleteUser(username, password) + + if response: + return make_response({'message': 'User deleted'}, 204) + return make_response({'message': 'User deleted failed'}, 409) + + @user_management.route('/createOrganisation', methods=['POST']) def create_organisation(): data = request.get_json() diff --git a/postgres/transactions.py b/postgres/transactions.py index 2b5b3d93..1fbd8c83 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -181,8 +181,11 @@ def deleteUser(user: str, password: str): raise Exception("wrong password") delete_query = sql.SQL("DELETE FROM users WHERE username = %s;") - execute_transaction(delete_query, (user,), commit=True) + response = execute_transaction(delete_query, (user,), commit=True, fetch=False) + if isinstance(response, bool): + return response + raise TypeError("response :", response) except Exception as e: print("deleteUser failed because: \n", e) From a0de5efa31cec89670a7cce2d3e39e171e782606 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:31:57 +0100 Subject: [PATCH 083/254] feat(tokenDecode): adjust return types if expired --- config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 0efd6dba..743b6825 100644 --- a/config.py +++ b/config.py @@ -19,7 +19,11 @@ def tokenEncode(obj: dict[str, Any]): def tokenDecode(string: str): - decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) + try: + decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", + verify=True) + except jwt.ExpiredSignatureError: + return False user = decoded_token.get('user') _id = int(decoded_token.get('id')) exp = decoded_token.get('exp') From 265a9f02858be69f98012a8db7bbe9f647c822df Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:53:39 +0100 Subject: [PATCH 084/254] style: reformat --- postgres/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/util.py b/postgres/util.py index cebf2fcb..df95fa17 100644 --- a/postgres/util.py +++ b/postgres/util.py @@ -23,7 +23,7 @@ def connectPG(): raise Exception("Connection failed because: \n", e) -def execute_transaction(query, params=None, commit=False ,fetch=True): +def execute_transaction(query, params=None, commit=False, fetch=True): conn = None cur = None try: From 9847f22a6fa66c96a5f2083c2bf4f0c7ded4ab36 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 14 Dec 2023 16:56:29 +0100 Subject: [PATCH 085/254] fix(execute_transaction): return bug --- postgres/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/postgres/util.py b/postgres/util.py index df95fa17..dd24f41c 100644 --- a/postgres/util.py +++ b/postgres/util.py @@ -51,7 +51,6 @@ def execute_transaction(query, params=None, commit=False, fetch=True): conn.close() if cur: cur.close() - return False def execute_query(query: SQL, params=None): From c4116e0407e1e6c84a6e0975cbebb453b919c69c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:50:06 +0100 Subject: [PATCH 086/254] feat(tokenDecode):add prevent decode bug --- config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config.py b/config.py index 743b6825..919a70ca 100644 --- a/config.py +++ b/config.py @@ -19,6 +19,8 @@ def tokenEncode(obj: dict[str, Any]): def tokenDecode(string: str): + if string is None or len(string) < 2: + raise ValueError("string value is: ", string) try: decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) From 2a031d1a3c951fb718ca00aff3771503027fd215 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:50:45 +0100 Subject: [PATCH 087/254] feat(upload_files): adjust endpoints --- flask_app/endpoints.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py index 85929591..f4793068 100644 --- a/flask_app/endpoints.py +++ b/flask_app/endpoints.py @@ -8,11 +8,10 @@ @main_routes.route('/upload', methods=['POST']) def upload_files(): - try: files = request.files.getlist('file') form = request.form - authorization = form.get("authorization") + authorization = request.headers.get("authorization") organisation_id = int(form.get("organisationId")) token = tokenDecode(authorization) @@ -35,5 +34,4 @@ def upload_files(): return make_response(document_ids, 207) return make_response(document_ids, 201) - except Exception as e: - return make_response({"message": "Upload failed", "details": str(e)}, 500) + From 865a3c7a71a076fef68003d29ba00f025e3c48c2 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:51:43 +0100 Subject: [PATCH 088/254] fix: db error handling for some queries --- postgres/queries.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index 2e8de684..66d712ec 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -2,6 +2,8 @@ import bcrypt from psycopg2 import sql + +from config import tokenDecode, Token from postgres.util import execute_query @@ -21,19 +23,27 @@ def getMemberIDsFromOrganisationID(organisationID: int): def getOrganisationIDsFromUserId(userID: int): - select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") - return execute_query(select_query, (userID,)) + try: + select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") + response = execute_query(select_query, (userID,)) + if isinstance(response, list): + return response[0], None + elif response is None: + return [-1], None + else: + return None, "Unexpected response format" + + except Exception as e: + return None, e def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") _password, _id = execute_query(select_query, (user,))[0] + try: if _password: - if isinstance(_password, str): - stored_password = bytes(_password.encode('utf-8')) - else: - stored_password = bytes(_password) + stored_password = bytes(_password) check = bcrypt.checkpw(password.encode('utf-8'), stored_password) if check: return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(_id) From 63dae93f10792e7c7c7c55190caa4c16819f70f1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:51:59 +0100 Subject: [PATCH 089/254] fix: db error handling for some queries --- postgres/transactions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 1fbd8c83..6fa83e40 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -149,6 +149,8 @@ def addUser(user: str, password: str): data_to_insert = (user, pwHash) response = execute_transaction(insert_data_query, data_to_insert, commit=True) return int(response[0][0]) + except IntegrityError: + return -1 except Exception as e: print("addUser failed because: \n", e) @@ -180,7 +182,9 @@ def deleteUser(user: str, password: str): if not pwcheck: raise Exception("wrong password") - delete_query = sql.SQL("DELETE FROM users WHERE username = %s;") + delete_query = sql.SQL(""" + DELETE FROM users WHERE username = %s +""") response = execute_transaction(delete_query, (user,), commit=True, fetch=False) if isinstance(response, bool): From 4b154b4464877945bc41d2b6b776d108a36aaf81 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:52:49 +0100 Subject: [PATCH 090/254] feat(register): give better endpoints --- flask_app/user.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 262dcec5..1ebf1f4d 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -16,14 +16,16 @@ def register(): _id = addUser(username, password) - if _id: + if _id > 0: user = Token(username, _id) token = tokenEncode(user.json()) return make_response({'message': 'User registered successfully', 'token': token}, 201) - - return make_response({'message': 'User register failed'}, 422) + elif _id < 0: + return make_response({'message': 'Conflicting username'}, 409) + else: + return make_response({'message': 'User register failed'}, 422) @user_management.route('/login', methods=['POST']) From 671a4f4517ced00738f266da7a6201dd8cefb640 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:53:11 +0100 Subject: [PATCH 091/254] feat(delete_user): give better endpoints --- flask_app/user.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 1ebf1f4d..ff671487 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -51,11 +51,17 @@ def delete_user(): data = request.get_json() username = data.get('username') password = data.get('password') - authorization = data.get("authorization") + authorization = request.headers.get("Authorization") + + if authorization is None: + return make_response({'message': 'no authorization '}, 401) check, _id = checkPassword(username, password) token = tokenDecode(authorization) - if not (token and check and token.id == _id): + if token is None: + return make_response({'message': 'no authorization '}, 400) + + if check is False or token.id != _id: return make_response({'message': 'User not authorised '}, 401) response = deleteUser(username, password) @@ -65,6 +71,7 @@ def delete_user(): return make_response({'message': 'User deleted failed'}, 409) + @user_management.route('/createOrganisation', methods=['POST']) def create_organisation(): data = request.get_json() From 5fd0c785727f1fd112123cf0cc814abf7b8d01e3 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:53:33 +0100 Subject: [PATCH 092/254] feat(create_organisation): give better endpoints --- flask_app/user.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index ff671487..63fe320f 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -71,21 +71,18 @@ def delete_user(): return make_response({'message': 'User deleted failed'}, 409) - @user_management.route('/createOrganisation', methods=['POST']) def create_organisation(): data = request.get_json() - authorization = data.get("authorization") - token = tokenDecode(authorization) - if token is None: - return make_response({}, 401) + authorization = request.headers.get("Authorization") organisation_name = data.get("organisationName") organisation_id, error = addOrganisation(organisation_name, authorization) if error: return make_response({"error": error}, 409) - return make_response({'organisation_id': organisation_id}, 200) + else: + return make_response({'organisation_id': organisation_id}, 200) @user_management.route('/addUserToOrganisation', methods=['POST']) From 1a0ed64672e993c655c8401b61d86a0683a8cc3e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:53:59 +0100 Subject: [PATCH 093/254] feat(get_organisations): add new endpoint get_organisations --- flask_app/user.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/flask_app/user.py b/flask_app/user.py index 63fe320f..3910c686 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -85,6 +85,23 @@ def create_organisation(): return make_response({'organisation_id': organisation_id}, 200) +@user_management.route('/getOrganisations', methods=['GET']) +def get_organisations(): + authorization = request.headers.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({}, 401) + + organisation_ids, error = getOrganisationIDsFromUserId(token.id) + print(organisation_ids) + if error: + return make_response({"error": error}, 409) + elif organisation_ids[0] < 0: + return make_response({'user is in no organisation'}, 204) + else: + return make_response({'organisation_ids': organisation_ids}, 200) + + @user_management.route('/addUserToOrganisation', methods=['POST']) def add_user_to_organisation(): data = request.get_json() From 8403785ec4d151d4ffd5b99de96b7e74dd09e3b8 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Dec 2023 20:54:11 +0100 Subject: [PATCH 094/254] feat(get_organisations): add new endpoint get_organisations --- flask_app/user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flask_app/user.py b/flask_app/user.py index 3910c686..7d470f82 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -2,7 +2,7 @@ from flask import Blueprint, request, make_response from config import Token, tokenEncode, tokenDecode -from postgres.queries import checkPassword +from postgres.queries import checkPassword, getOrganisationIDsFromUserId from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser user_management = Blueprint('user_management', __name__) From 7dc549c055c1cc51a55afa01946337e7c7908c00 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 19 Dec 2023 18:16:15 +0100 Subject: [PATCH 095/254] fix(users): adjusted retrun order --- .pylintrc | 120 +++++++++++++++++++++++++++++++++++++++++----- flask_app/user.py | 15 +++--- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/.pylintrc b/.pylintrc index 62b5bd36..c90edfe3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,22 +1,118 @@ [MASTER] -# Specify the Python version you are using -python = 3.9 - -# Add the directories or files you want to ignore -ignore = venv, virtualenv, __pycache__ +init-hook='import os, sys; sys.path.append(os.path.abspath(os.path.curdir))' +ignore=tests +load-plugins= +jobs=4 +unsafe-load-any-extension=no +extension-pkg-whitelist= [MESSAGES CONTROL] -# Enable or disable messages based on your preferences -disable = C0330, C0114, C0115, C0116 +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=locally-disabled,too-few-public-methods,too-many-ancestors,useless-object-inheritance,useless-return,unnecessary-pass + + +[REPORTS] +output-format=text +reports=yes +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) +#msg-template= + +[LOGGING] +logging-modules=logging + +[SIMILARITIES] +min-similarity-lines=8 +ignore-comments=yes +ignore-docstrings=yes +ignore-imports=no [FORMAT] +max-line-length=100 +ignore-long-lines=^\s*(# )??$ +single-line-if-stmt=no + +max-module-lines=1000 +indent-string='\t' + +[SPELLING] +spelling-dict= +spelling-ignore-words= +spelling-private-dict-file= +spelling-store-unknown-words=no + + +[VARIABLES] +init-import=no +dummy-variables-rgx=(_[a-zA-Z0-9_]*?$) +additional-builtins= +callbacks=cb_,_cb,handle_,get,post,put,patch,delete,options +redefining-builtins-modules=six.moves,future.builtins + + +[TYPECHECK] +ignore-mixin-members=yes +ignored-modules=flask_sqlalchemy,app.extensions.flask_sqlalchemy +ignored-classes=optparse.Values,thread._local,_thread._local +generated-members=fget,query,begin,add,merge,delete,commit,rollback +contextmanager-decorators=contextlib.contextmanager + + +[MISCELLANEOUS] +notes=FIXME,XXX,TODO +disable=missing-function-docstring + + +[BASIC] +good-names=i,j,k,ex,Run,_,log,api +bad-names=foo,bar,baz,toto,tutu,tata + + +[ELIF] +max-nested-blocks=5 + + +[DESIGN] +max-args=5 +ignored-argument-names=_.* +max-bool-expr=5 + + +[IMPORTS] +deprecated-modules=optparse +import-graph= +ext-import-graph= +int-import-graph= +known-standard-library= +known-third-party=flask_restplus_patched +analyse-fallback-blocks=no + -# Set the maximum number of characters per line -max-line-length = 120 +[CLASSES] +defining-attr-methods=__init__,__new__,setUp +valid-classmethod-first-arg=cls +valid-metaclass-classmethod-first-arg=mcs +exclude-protected=_asdict,_fields,_replace,_source,_make -# Specify the regular expressions for files or directories to include or exclude -include-ids = yes -indent-string = "\t" +[EXCEPTIONS] +overgeneral-exceptions=builtins.Exception \ No newline at end of file diff --git a/flask_app/user.py b/flask_app/user.py index 7d470f82..2385070d 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -2,7 +2,7 @@ from flask import Blueprint, request, make_response from config import Token, tokenEncode, tokenDecode -from postgres.queries import checkPassword, getOrganisationIDsFromUserId +from postgres.queries import checkPassword, getOrganisationIDsFromUserId, getOrganisationName from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser user_management = Blueprint('user_management', __name__) @@ -22,10 +22,9 @@ def register(): return make_response({'message': 'User registered successfully', 'token': token}, 201) - elif _id < 0: + if _id < 0: return make_response({'message': 'Conflicting username'}, 409) - else: - return make_response({'message': 'User register failed'}, 422) + return make_response({'message': 'User register failed'}, 422) @user_management.route('/login', methods=['POST']) @@ -42,8 +41,9 @@ def login(): return make_response({'message': 'Log in successfully', 'token': token}, 200) - else: + if not _correct: return make_response({'message': 'Wrong Password'}, 401) + return make_response({'message': 'User login failed'}, 422) @user_management.route('/deleteUser/', methods=['POST']) @@ -79,10 +79,9 @@ def create_organisation(): organisation_name = data.get("organisationName") organisation_id, error = addOrganisation(organisation_name, authorization) - if error: - return make_response({"error": error}, 409) - else: + if error is None: return make_response({'organisation_id': organisation_id}, 200) + return make_response({"error": error}, 409) @user_management.route('/getOrganisations', methods=['GET']) From 735205e0bad92a26eaea2caa4348a645d2fce4a4 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 19 Dec 2023 18:16:57 +0100 Subject: [PATCH 096/254] feat(getOrganisationName): add getOrganisationName --- flask_app/user.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 2385070d..d954a31e 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -92,13 +92,29 @@ def get_organisations(): return make_response({}, 401) organisation_ids, error = getOrganisationIDsFromUserId(token.id) - print(organisation_ids) - if error: - return make_response({"error": error}, 409) - elif organisation_ids[0] < 0: - return make_response({'user is in no organisation'}, 204) - else: + if error is None: return make_response({'organisation_ids': organisation_ids}, 200) + if organisation_ids[0] < 0: + return make_response({'user is in no organisation'}, 204) + return make_response({"error": error}, 409) + + + + +@user_management.route('/getOrganisationName/<_id>', methods=['GET']) +def get_organisation_name(_id): + authorization = request.headers.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({}, 401) + + organisation_name = getOrganisationName(_id) + + if organisation_name == -1: + return make_response({'organisation not found': organisation_name}, 404) + if isinstance(organisation_name, str): + return make_response({"organisation_name": organisation_name}, 200) + return make_response({"error": "error"}, 409) @user_management.route('/addUserToOrganisation', methods=['POST']) From e83bdd494ff01001350c8a20400bdba3c03abf42 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 19 Dec 2023 18:20:36 +0100 Subject: [PATCH 097/254] feat(getOrganisationName): add getOrganisationName --- flask_app/user.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index d954a31e..b657bd04 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -96,9 +96,7 @@ def get_organisations(): return make_response({'organisation_ids': organisation_ids}, 200) if organisation_ids[0] < 0: return make_response({'user is in no organisation'}, 204) - return make_response({"error": error}, 409) - - + return make_response({"error": error}, 409) @user_management.route('/getOrganisationName/<_id>', methods=['GET']) From 21f783d3b20b79ceee45b3001d427155a127cae5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 19 Dec 2023 19:07:18 +0100 Subject: [PATCH 098/254] feat(getOrganisationName): add getOrganisationName --- postgres/queries.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/postgres/queries.py b/postgres/queries.py index 66d712ec..8824336a 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -1,9 +1,6 @@ -from typing import Union, Tuple - +from typing import Union import bcrypt from psycopg2 import sql - -from config import tokenDecode, Token from postgres.util import execute_query @@ -14,7 +11,18 @@ def getUserID(user: str): def getOrganisationID(organisation_name: str): select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") - return execute_query(select_query, (organisation_name,)) + response = execute_query(select_query, (organisation_name,)) + if response is None: + return -1 + return int(response[0]) + + +def getOrganisationName(organisation_id: int): + select_query = sql.SQL("SELECT name FROM organisations WHERE id = %s;") + response = execute_query(select_query, (organisation_id,)) + if response is None: + return -1 + return str(response[0]) def getMemberIDsFromOrganisationID(organisationID: int): From 5e65fc1e51688db236a3133f2c87830bb64ed7d5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 19 Dec 2023 20:11:13 +0100 Subject: [PATCH 099/254] feat: add getOrganisationNames --- flask_app/user.py | 20 ++++++++++++++++++-- postgres/queries.py | 21 ++++++++++++++++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index b657bd04..945d01e7 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -2,7 +2,8 @@ from flask import Blueprint, request, make_response from config import Token, tokenEncode, tokenDecode -from postgres.queries import checkPassword, getOrganisationIDsFromUserId, getOrganisationName +from postgres.queries import (checkPassword, + getOrganisationIDsFromUserId, getOrganisationName, getOrganisationFromUserId) from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser user_management = Blueprint('user_management', __name__) @@ -95,7 +96,7 @@ def get_organisations(): if error is None: return make_response({'organisation_ids': organisation_ids}, 200) if organisation_ids[0] < 0: - return make_response({'user is in no organisation'}, 204) + return make_response({'user is in no organisation'}, 404) return make_response({"error": error}, 409) @@ -115,6 +116,21 @@ def get_organisation_name(_id): return make_response({"error": "error"}, 409) +@user_management.route('/getOrganisationNames', methods=['GET']) +def get_organisation_names(): + authorization = request.headers.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({}, 401) + + organisations, error = getOrganisationFromUserId(token.id) + if error is None: + return make_response({'organisations': organisations}, 200) + if organisations < 0: + return make_response({'user is in no organisation'}, 404) + return make_response({"error": error}, 409) + + @user_management.route('/addUserToOrganisation', methods=['POST']) def add_user_to_organisation(): data = request.get_json() diff --git a/postgres/queries.py b/postgres/queries.py index 8824336a..92c41ec7 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -37,7 +37,7 @@ def getOrganisationIDsFromUserId(userID: int): if isinstance(response, list): return response[0], None elif response is None: - return [-1], None + return -1, None else: return None, "Unexpected response format" @@ -45,6 +45,25 @@ def getOrganisationIDsFromUserId(userID: int): return None, e +def getOrganisationFromUserId(user_id: int): + try: + select_query = sql.SQL(""" SELECT organisationid, o.name + FROM membership + JOIN organisations o ON membership.organisationid = o.id + WHERE userid = %s;""") + response = execute_query(select_query, (user_id,)) + if isinstance(response, list): + organisations: list[dict[str, Union[str, int]]] = [] + for org in response: + organisations.append({"id": int(org[0]), "name": str(org[1])}) + return organisations, None + if response is None: + return [-1], None + return None, "Unexpected response format" + except Exception as e: + return None, e + + def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") _password, _id = execute_query(select_query, (user,))[0] From 97c68aeb3c6087d9b449a31ef0432cf9bd2d5825 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 20 Dec 2023 15:40:30 +0100 Subject: [PATCH 100/254] feat(dev): adjust tables --- postgres/transactions.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/postgres/transactions.py b/postgres/transactions.py index 6fa83e40..b0a646af 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -10,7 +10,7 @@ def createSchema(schema): try: create_schema_query = sql.SQL(f"CREATE SCHEMA {schema};") - execute_transaction(create_schema_query, (), fetch=False) + execute_transaction(create_schema_query, commit=True, fetch=False) print(f"Schema {schema} created successfully.") except Exception as e: print(f"Error creating schema {schema}: {e}") @@ -19,7 +19,7 @@ def createSchema(schema): def dropSchema(schema): try: drop_schema_query = sql.SQL(f"DROP SCHEMA IF EXISTS {schema} CASCADE;") - execute_transaction(drop_schema_query, (), fetch=False) + execute_transaction(drop_schema_query, commit=True, fetch=False) print(f"Schema {schema} dropped successfully.") except Exception as e: print(f"Error dropping schema {schema}: {e}") @@ -49,7 +49,7 @@ def createUserTable(schema): TABLESPACE pg_default; """) - execute_transaction(create_table_query, commit=True) + execute_transaction(create_table_query, commit=True, fetch=False) except Exception as e: print("createUserTable failed because: \n", e) @@ -66,18 +66,18 @@ def createDocumentsTable(schema): CONSTRAINT dokumentid PRIMARY KEY (id), CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid) REFERENCES {schema}.organisations (id) MATCH SIMPLE - ON UPDATE NO ACTION - ON DELETE NO ACTION + ON UPDATE CASCADE + ON DELETE CASCADE NOT VALID, CONSTRAINT documents_userid_fkey FOREIGN KEY (userid) REFERENCES {schema}.users (id) MATCH SIMPLE - ON UPDATE NO ACTION - ON DELETE NO ACTION + ON UPDATE CASCADE + ON DELETE CASCADE NOT VALID ) TABLESPACE pg_default;""") - execute_transaction(create_table_query, commit=True) + execute_transaction(create_table_query, commit=True,fetch=False) except Exception as e: print("createUserTable failed because: \n", e) @@ -92,13 +92,13 @@ def createMembershipTable(schema): CONSTRAINT membership_pkey PRIMARY KEY (userid, organisationid), CONSTRAINT membership_organisationid_fkey FOREIGN KEY (organisationid) REFERENCES {schema}.organisations (id) MATCH SIMPLE - ON UPDATE NO ACTION - ON DELETE NO ACTION + ON UPDATE CASCADE + ON DELETE CASCADE NOT VALID, CONSTRAINT membership_userid_fkey FOREIGN KEY (userid) REFERENCES {schema}.users (id) MATCH SIMPLE - ON UPDATE NO ACTION - ON DELETE NO ACTION + ON UPDATE CASCADE + ON DELETE CASCADE NOT VALID ) @@ -114,7 +114,7 @@ def createMembershipTable(schema): ON {schema}.membership USING btree (organisationid ASC NULLS LAST) TABLESPACE pg_default;""") - execute_transaction(create_table_query, commit=True) + execute_transaction(create_table_query, commit=True, fetch=False) except Exception as e: print("createUserTable failed because: \n", e) @@ -132,7 +132,7 @@ def createOrganisationTable(schema): TABLESPACE pg_default; """) - execute_transaction(create_table_query, commit=True) + execute_transaction(create_table_query, commit=True, fetch=False) except Exception as e: print("createUserTable failed because: \n", e) From e8c314b4709815a5d8b9906f5e515def7b212369 Mon Sep 17 00:00:00 2001 From: phil1436 Date: Thu, 21 Dec 2023 20:49:30 +0100 Subject: [PATCH 101/254] added leave org --- ROUTES.md | 50 +++++++++++++++++++++++++++++++++------- flask_app/user.py | 14 ++++++++++- postgres/transactions.py | 22 ++++++++++++++++++ 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index 7063dc7e..f245eacc 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -93,7 +93,7 @@ http://localhost:8000/login "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1..." } ``` - + --- ## Organisation @@ -120,19 +120,54 @@ http://localhost:8000/creatOrganisation - 409: duplication **Conflict**: ```json { - "error": "name already exists." + "error": "name already exists." } ``` - 200: **success**: ```json { - "organisation_id": "---" + "organisation_id": "---" } ``` - + **POST** -creatOrganisation +leaveOrganisation + +_Leave a organisation and delete the organisation if the user is the last member._ + +``` +http://localhost:8000/leaveOrganisation +``` + +### Body + +```json +{ + "authorization": "---", + "organisationId": "---" +} +``` + +### Response + +- 500: **error**: + ```json + { + "status": false, + "msg": "error message" + } + ``` +- 200: **success**: + ```json + { + "status": true + } + ``` + +**POST** + +addUserToOrganisation ``` http://localhost:8000/addUserToOrganisation @@ -153,13 +188,13 @@ http://localhost:8000/addUserToOrganisation - 409: duplication **Conflict**: (temp) ```json { - "error": "error message" + "error": "error message" } ``` - 200: **success**: ```json { - "organisation_id": "---" + "organisation_id": "---" } ``` @@ -214,7 +249,6 @@ http://localhost:8000/dev/getDocument/<_id> - String of File Content - --- ## create-tables diff --git a/flask_app/user.py b/flask_app/user.py index 945d01e7..26511a1a 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -4,7 +4,7 @@ from config import Token, tokenEncode, tokenDecode from postgres.queries import (checkPassword, getOrganisationIDsFromUserId, getOrganisationName, getOrganisationFromUserId) -from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser +from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser, leaveOrganisation user_management = Blueprint('user_management', __name__) @@ -84,6 +84,18 @@ def create_organisation(): return make_response({'organisation_id': organisation_id}, 200) return make_response({"error": error}, 409) +@user_management.route('/leaveOrganisation', methods=['POST']) +def leave_organisation(): + data = request.get_json() + authorization = request.headers.get("Authorization") + + organisationId = data.get("organisationId") + + success, error = leaveOrganisation(organisationId, authorization) + if success: + return make_response({'status': True}, 200) + return make_response({"status": False, "msg": str(error)}, 500) + @user_management.route('/getOrganisations', methods=['GET']) def get_organisations(): diff --git a/postgres/transactions.py b/postgres/transactions.py index b0a646af..c5b0121f 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -210,6 +210,28 @@ def addOrganisation(organisationName: str, sessionToken: str): except Exception as e: print("addOrganisation failed because: \n", e) + +def leaveOrganisation(organisationId: int, sessionToken: str): + try: + token: Token = tokenDecode(sessionToken) + userid = token.id + + delete_query = sql.SQL("DELETE FROM membership WHERE userid = (%s) AND organisationid = (%s) returning organisationid") + execute_transaction(delete_query, (userid,organisationId, ), commit=True) + + count_query = sql.SQL("SELECT COUNT(*) FROM membership WHERE organisationid = (%s)") + count = execute_transaction(count_query, [organisationId], commit=True) + count = int(count[0][0]) + if count > 0: + return True, None + + delete_query = sql.SQL("DELETE FROM organisations WHERE id = (%s)") + execute_transaction(delete_query, [organisationId], commit=True, fetch=False) + return True, None + except Exception as e: + print("leaveOrganisation failed because: \n", e) + return False, e + def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): From bedbd4840b0bfafc9f27987dcb66c0b44f0a5db9 Mon Sep 17 00:00:00 2001 From: phil1436 Date: Fri, 22 Dec 2023 10:44:52 +0100 Subject: [PATCH 102/254] added get memebers for org --- ROUTES.md | 37 +++++++++++++++++++++++++++++++++++++ flask_app/user.py | 19 ++++++++++++++++++- postgres/queries.py | 3 +++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ROUTES.md b/ROUTES.md index f245eacc..59bcba64 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -130,6 +130,43 @@ http://localhost:8000/creatOrganisation } ``` +**GET** + +getOrganisationMembers + +``` +http://localhost:8000/getOrganisationMembers/ +``` + +### Header + +```json +{ + "authorization": "---" +} +``` + +### Response + +- 404: Organisation **not found**: + ```json + { + "error": "Organisation not found." + } + ``` +- 401: User **not authorized**: + ```json + { + "error": "User not authorized." + } + ``` +- 200: **success**: + ```json + { + "members": ["username1", "username2", "username3"] + } + ``` + **POST** leaveOrganisation diff --git a/flask_app/user.py b/flask_app/user.py index 26511a1a..0e0fb9eb 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -2,7 +2,7 @@ from flask import Blueprint, request, make_response from config import Token, tokenEncode, tokenDecode -from postgres.queries import (checkPassword, +from postgres.queries import (checkPassword, getMembersOfOrganisation, getOrganisationIDsFromUserId, getOrganisationName, getOrganisationFromUserId) from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser, leaveOrganisation @@ -159,3 +159,20 @@ def add_user_to_organisation(): if error: return make_response({"error": error}, 409) return make_response({'organisation_id': organisation_id}, 200) + +@user_management.route('/getOrganisationMembers/<_id>', methods=['GET']) +def get_organisation_members(_id): + authorization = request.headers.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + + members_raw = getMembersOfOrganisation(_id) + if members_raw is None: + return make_response({'error':'organisation '+_id+' not found'}, 404) + + members = [] + for member in members_raw: + members.append(member[0]) + + return make_response({"members": members}, 200) \ No newline at end of file diff --git a/postgres/queries.py b/postgres/queries.py index 92c41ec7..ec389325 100644 --- a/postgres/queries.py +++ b/postgres/queries.py @@ -24,6 +24,9 @@ def getOrganisationName(organisation_id: int): return -1 return str(response[0]) +def getMembersOfOrganisation(organisation_id: int): + select_query = sql.SQL("SELECT username FROM users WHERE id IN (SELECT userid FROM membership WHERE organisationid = %s);") + return execute_query(select_query, (organisation_id,)) def getMemberIDsFromOrganisationID(organisationID: int): select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") From db05b91f3a85b947eebebbb3b0532b888fa71cae Mon Sep 17 00:00:00 2001 From: phil1436 Date: Fri, 22 Dec 2023 11:18:28 +0100 Subject: [PATCH 103/254] added addMember --- flask_app/user.py | 12 ++++++------ postgres/transactions.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/flask_app/user.py b/flask_app/user.py index 0e0fb9eb..1ecf1edf 100644 --- a/flask_app/user.py +++ b/flask_app/user.py @@ -4,7 +4,7 @@ from config import Token, tokenEncode, tokenDecode from postgres.queries import (checkPassword, getMembersOfOrganisation, getOrganisationIDsFromUserId, getOrganisationName, getOrganisationFromUserId) -from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, deleteUser, leaveOrganisation +from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, addUserToOrganisation2, deleteUser, leaveOrganisation user_management = Blueprint('user_management', __name__) @@ -145,16 +145,16 @@ def get_organisation_names(): @user_management.route('/addUserToOrganisation', methods=['POST']) def add_user_to_organisation(): - data = request.get_json() - authorization = data.get("authorization") + authorization = request.headers.get("authorization") token = tokenDecode(authorization) if token is None: - return make_response({}, 401) + return make_response({'error': 'no authorization'}, 401) - organisation_name = data.get("organisationName") + data = request.get_json() + organisation_name = data.get("organisationId") new_user = data.get("newUser") - organisation_id, error = addUserToOrganisation(organisation_name, authorization, new_user) + organisation_id, error = addUserToOrganisation2(organisation_name, new_user) if error: return make_response({"error": error}, 409) diff --git a/postgres/transactions.py b/postgres/transactions.py index c5b0121f..9ea81562 100644 --- a/postgres/transactions.py +++ b/postgres/transactions.py @@ -270,6 +270,24 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str except Exception as e: print("addUserToOrganisation failed because: \n", e) +def addUserToOrganisation2(organisationId: int, newUser: str): + try: + select_id_query = sql.SQL("SELECT id FROM users WHERE username = (%s)") + userid = execute_transaction(select_id_query, (newUser,), commit=True) + if userid is None: + return None, "User does not exist" + + insert_query = sql.SQL("INSERT INTO membership (userid, organisationid) VALUES (%s, %s) returning organisationid") + organisation_id = execute_transaction(insert_query, (userid[0][0], organisationId), commit=True) + if organisation_id is None: + return None, "you have no privileges in this organisation" + return int(organisation_id[0][0]), None + except IntegrityError: + return None, "User already in organisation" + except Exception as e: + print("addUserToOrganisation2w failed because: \n", e) + return None, 'Unknown error' + def removeUserFromOrganisation(organisationName: str, sessionToken: str, userToRemove: str): try: From c8b063e484f825a90e8fe56730990c8ffc1c66c2 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:25:11 +0100 Subject: [PATCH 104/254] add env --- wannadb_web/.env/.dev | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 wannadb_web/.env/.dev diff --git a/wannadb_web/.env/.dev b/wannadb_web/.env/.dev new file mode 100644 index 00000000..d257e7ce --- /dev/null +++ b/wannadb_web/.env/.dev @@ -0,0 +1,19 @@ +FLASK_DEBUG=1 +FLASK_CONFIG=development +DATABASE_HOST=postgres +DATABASE_PORT=5432 +DATABASE_NAME=userManagement +DATABASE_USER=postgres +DATABASE_PASSWORD=0 + +CACHE_HOST=redis +CACHE_PORT=6379 +CACHE_DB=0 +CACHE_PASSWORD=0 + +DATABASE_URL=postgresql://${DATABASE_USER}:${DATABASE_PASSWORD}@localhost:5432/postgres +SECRET_KEY=my_precious +CELERY_BROKER_URL=redis://redis-container:6379/0 +CELERY_RESULT_BACKEND=redis://redis-container:6379/0 + + From be12c129e8ff54825c6351a5cec023622989ef6c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:26:00 +0100 Subject: [PATCH 105/254] add celery and debug --- app.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index 83390a15..9efc196e 100644 --- a/app.py +++ b/app.py @@ -1,24 +1,84 @@ -# app.py -from flask import Flask +import logging +import os + +from celery import Celery, Task +from flask import Flask, make_response, render_template_string from flask_cors import CORS +from flask_debugtoolbar import DebugToolbarExtension + +from wannadb_web.Redis.util import RedisConnection +from wannadb_web.routing.core import core_routes +from wannadb_web.routing.dev import dev_routes +from wannadb_web.routing.user import user_management +from wannadb_web.routing.files import main_routes -from flask_app.dev import dev_routes -from flask_app.endpoints import main_routes -from flask_app.user import user_management +logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") app = Flask(__name__) + + +def celery_init_app(_app: Flask) -> Celery: + _app.app_context() + RedisConnection() + + class FlaskTask(Task): + + def __call__(self, *args: object, **kwargs: object) -> object: + return self.run(*args, **kwargs) + + celery_app = Celery(_app.name, task_cls=FlaskTask) + celery_app.config_from_object(_app.config) # Use the app's entire configuration + celery_app.set_default() + _app.extensions["celery"] = celery_app + return celery_app + + +# Combine Flask and Celery configs +app.config.from_mapping( + SECRET_KEY='secret!', + DEBUG=True, + DEBUG_TB_ENABLED=True, + DEBUG_TB_PROFILER_ENABLED=True, + broker_url=os.environ.get("CELERY_BROKER_URL"), + task_ignore_result=True, + PREFERRED_URL_SCHEME='https', + #PROPAGATE_EXCEPTIONS=True +) +app.config['DEBUG'] = True +# Register the Extensions CORS(app) +toolbar = DebugToolbarExtension(app) + + +celery = celery_init_app(app) # Register the blueprints app.register_blueprint(main_routes) app.register_blueprint(user_management) app.register_blueprint(dev_routes) +app.register_blueprint(core_routes) -@app.route('/') -def hello_world(): - return 'Hello' +@app.errorhandler(404) +def not_found_error(error): + return make_response({'error': f'Not Found \n {error}'}, 404) -if __name__ == '__main__': - app.run(host='0.0.0.0', port=8000, debug=True) +@app.errorhandler(Exception) +def generic_error(error): + return make_response({'error': f'Internal Server Error \n {error}'}, 500) + + +@app.route('/') +@app.route('/DEBUG') +def index(): + html_code = """ + + +
+

hello

+
+ + + """ + return render_template_string(html_code) From 2a615ff833eabf73156249247a66541295ec4ae7 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:26:25 +0100 Subject: [PATCH 106/254] adj dep --- backend-requirements.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 9beb4274..87a02da3 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -1,12 +1,18 @@ -pip==23.3.1 +pip==23.3.2 flask==3.0.0 Flask_Cors==4.0.0 gunicorn==21.2.0 psycopg2~=2.9.9 -bcrypt~=4.1.1 +bcrypt==4.1.2 PyJWT~=2.8.0 wheel==0.42.0 tornado~=6.4 setuptools~=69.0.2 werkzeug~=3.0.1 -pylint~=3.0.3 \ No newline at end of file +pylint~=3.0.3 +flask_profiler~=1.8.1 +flask-debugtoolbar~=0.14.1 +celery~=5.3.6 +flower~=2.0.1 +redis~=5.0.1 +pickle5~=0.0.11 From f2c709f432454b0522ef9e308fc38780d5546731 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:26:55 +0100 Subject: [PATCH 107/254] add Cache_DB.py --- wannadb_web/SQLite/Cache_DB.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 wannadb_web/SQLite/Cache_DB.py diff --git a/wannadb_web/SQLite/Cache_DB.py b/wannadb_web/SQLite/Cache_DB.py new file mode 100644 index 00000000..75f17121 --- /dev/null +++ b/wannadb_web/SQLite/Cache_DB.py @@ -0,0 +1,32 @@ +import logging + +from wannadb_parsql.cache_db import SQLiteCacheDB + +logger = logging.getLogger(__name__) + + +class SQLiteCacheDBWrapper: + def __init__(self, user_id: int, db_file="wannadb_cache.db"): + """Initialize the RedisCache instance for a specific user.""" + self.db_identifier = f"{user_id}_{db_file}" + self.cache_db = SQLiteCacheDB(db_file=self.db_identifier) + + def delete(self): + self.cache_db.conn.close() + self.cache_db = None + self.db_identifier = None + + def reset_cache_db(self): + logger.debug("Reset cache db") + if self.cache_db is not None: + self.cache_db.conn.close() + self.cache_db = None + self.cache_db = SQLiteCacheDB(db_file=self.db_identifier) + + def disconnect(self): + if self.cache_db is None: + logger.error(f"Cache db {self.db_identifier} already deleted") + return False + logger.debug(f"Disconnect {self.db_identifier} from cache db") + self.cache_db.conn.close() + return True From e2283d02d982b16458f2ac9ca1b5f2f63895e4c4 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:27:17 +0100 Subject: [PATCH 108/254] add core.py --- wannadb_web/routing/core.py | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 wannadb_web/routing/core.py diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py new file mode 100644 index 00000000..3aa6ced2 --- /dev/null +++ b/wannadb_web/routing/core.py @@ -0,0 +1,90 @@ +""" +core_routes Module + +This module defines Flask routes for the 'core' functionality of the Wannadb UI. + +It includes a Blueprint named 'core_routes' with routes related to creating document bases. + +Routes: + - /core/create_document_base (POST): Endpoint for creating a document base. + + +Dependencies: + - Flask: Web framework for handling HTTP requests and responses. + - config.tokenDecode: Function for decoding authorization tokens. + - wannadb_ui.wannadb_api.WannaDBAPI: API for interacting with Wannadb. + +Example: + To create a Flask app and register the 'core_routes' Blueprint: + + ```python + from flask import Flask + from core_routes import core_routes + + app = Flask(__name__) + app.register_blueprint(core_routes) + ``` + +Author: Leon Wenderoth +""" +import logging.config +import pickle + +from celery.result import AsyncResult +from flask import Blueprint, make_response, jsonify, url_for + +from wannadb.data.data import Attribute +from wannadb.statistics import Statistics +from wannadb_web.util import tokenDecode +from wannadb_web.worker.tasks import create_document_base_task, long_task +from wannadb_web.worker.util import TaskObject + +core_routes = Blueprint('core_routes', __name__, url_prefix='/core') + +logger = logging.getLogger(__name__) + + +@core_routes.route('/document_base', methods=['POST']) +def create_document(): + # form = request.form + # authorization = request.headers.get("authorization") + # _organisation_id = int(form.get("organisationId")) + # + authorization = ("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyIjoibGVvbiIsImlkIjoxfQ.YM9gwcXeFSku" + "-bz4RUKkymYvA6Af13sxH-BRlnjCCEA") + + _token = tokenDecode(authorization) + _base_name = "base_name" + document_ids = [2, 3] + attribute = Attribute("a") + statistics = Statistics(False) + user_id = 1 + + attributesDump = pickle.dumps([attribute]) + statisticsDump = pickle.dumps(statistics) + + task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump)) + + return make_response({'task_id': task.id}, 202) + + +@core_routes.route('/longtask', methods=['POST']) +def longtask(): + task = long_task.apply_async() + return jsonify(str(task.id)), 202, {'Location': url_for('core_routes.task_status', + task_id=task.id)} + + +@core_routes.route('/status/') +def task_status(task_id): + task: AsyncResult = long_task.AsyncResult(task_id) + print(task.status) + meta = task.info + if meta is None: + return make_response({"error": "task not found"}, 404) + if not isinstance(meta, bytes): + return make_response({"error": "task not correct"}, 404) + + taskObject = TaskObject.from_dump(meta) + return make_response({"state": taskObject.state.value, "meta": taskObject.signals.to_json(), "msg": taskObject.msg}, + 200) From d852af6a6cd9a27bb12eefa87b635f7ef09641d3 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:28:10 +0100 Subject: [PATCH 109/254] style: reformat --- {flask_app => wannadb_web/routing}/dev.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) rename {flask_app => wannadb_web/routing}/dev.py (84%) diff --git a/flask_app/dev.py b/wannadb_web/routing/dev.py similarity index 84% rename from flask_app/dev.py rename to wannadb_web/routing/dev.py index 1253eeab..7f2056c3 100644 --- a/flask_app/dev.py +++ b/wannadb_web/routing/dev.py @@ -1,7 +1,8 @@ from flask import Blueprint, make_response -from postgres.queries import _getDocument -from postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, createMembershipTable, \ +from wannadb_web.postgres.queries import _getDocument +from wannadb_web.postgres.transactions import createUserTable, createDocumentsTable, createOrganisationTable, \ + createMembershipTable, \ dropTables, dropSchema, createSchema dev_routes = Blueprint('dev_routes', __name__, url_prefix='/dev') @@ -19,6 +20,7 @@ def create_tables(schema): except Exception as e: print(f"create Tables in {schema} failed because: \n", e) + @dev_routes.route('/dropTables/', methods=['POST']) def drop_tables(schema): try: From 7ce16d24823651cf6a4917eef9cf61004ceea57b Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:28:36 +0100 Subject: [PATCH 110/254] add worker and db to project --- Dockerfile | 27 ++++++++++++++++++++++---- docker-compose.yaml | 47 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index a67438c1..51c18910 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,20 @@ -FROM python:3.9 as build +FROM python:3.9-slim-buster as build + +ENV PYTHONUNBUFFERED 1 +ENV PYTHONDONTWRITEBYTECODE 1 + +RUN apt-get update \ + # dependencies for building Python packages + && apt-get install -y build-essential \ + # psycopg2 dependencies + && apt-get install -y libpq-dev \ + # Additional dependencies + && apt-get install -y telnet netcat \ + # cleaning up unused files + && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + && rm -rf /var/lib/apt/lists/* + -USER root RUN mkdir /home/wannadb WORKDIR /home/wannadb @@ -22,17 +36,22 @@ RUN pip install --use-pep517 -r backend-requirements.txt RUN pip install --use-pep517 pytest #RUN pytest -#copy the rest -COPY . . +FROM build as worker + + FROM build as dev #CMD [ "python", "app.py" ] + CMD ["flask", "--app", "app", "--debug", "run","--host","0.0.0.0", "--port", "8000" ] FROM build as prod +#copy the rest +COPY . . + RUN chmod +x entrypoint.sh # Define the entrypoint.sh diff --git a/docker-compose.yaml b/docker-compose.yaml index 09b60e73..7ed9759e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,20 +2,57 @@ version: "3.6" services: wannadb: build: - context: . dockerfile: Dockerfile target: dev restart: always tty: true ports: - "8000:8000" + env_file: + - wannadb_web/.env/.dev depends_on: - postgres + - redis volumes: - ./:/home/wannadb networks: - mynetwork + worker: + build: + dockerfile: Dockerfile + target: worker + tty: true + command: [ 'celery', '-A', 'app.celery','worker', '-l', 'info'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + depends_on: + - wannadb + - redis + + + flower: + build: + dockerfile: Dockerfile + target: worker + tty: true + command: [ 'celery', '-A', 'app.celery', 'flower' ] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + ports: + - "5555:5555" + depends_on: + - wannadb + - redis + postgres: image: postgres container_name: postgres-container @@ -29,6 +66,14 @@ services: volumes: - pgdata:/var/lib/postgresql/data + redis: + image: redis:alpine + container_name: redis-container + ports: + - "6379:6379" + networks: + - mynetwork + networks: mynetwork: driver: bridge From 23d9ed4a89c0080fd85cdc68cac01555a06d35f4 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:29:26 +0100 Subject: [PATCH 111/254] add files.py --- wannadb_web/routing/files.py | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 wannadb_web/routing/files.py diff --git a/wannadb_web/routing/files.py b/wannadb_web/routing/files.py new file mode 100644 index 00000000..93b38d8f --- /dev/null +++ b/wannadb_web/routing/files.py @@ -0,0 +1,56 @@ +from flask import Blueprint, request, make_response + +from wannadb_web.postgres.queries import getDocument +from wannadb_web.util import tokenDecode +from wannadb_web.postgres.transactions import addDocument + +main_routes = Blueprint('main_routes', __name__, url_prefix='/data') + + +@main_routes.route('/file', methods=['POST']) +def upload_files(): + files = request.files.getlist('file') + form = request.form + + authorization = request.headers.get("authorization") + organisation_id = int(form.get("organisationId")) + + token = tokenDecode(authorization) + + document_ids: list = [] + + for file in files: + content_type = file.content_type + if 'text/plain' in content_type: + filename = file.filename + content = str(file.stream.read().decode('utf-8')) + dokument_id = addDocument(filename, content, organisation_id, token.id) + document_ids.append(dokument_id) + else: + document_ids.append(f"wrong type {content_type}") + + if all(isinstance(document_ids, str) for _ in document_ids): + return make_response(document_ids, 400) + if any(isinstance(document_ids, str) for _ in document_ids): + return make_response(document_ids, 207) + return make_response(document_ids, 201) + + +@main_routes.route('/file/<_id>', methods=['GET']) +def get_file(_id): + print(request.json) + authorization = request.json.get("authorization") + document_id = int(_id) + + token = tokenDecode(authorization) + + document_ids: list = [] + + document = getDocument(document_id, token.id) + + if document is None: + return make_response(document_ids, 404) + if isinstance(document, str): + return make_response(document, 200) + if isinstance(document, bytes): + return make_response(document, 206) From 1812c6e2015bf91e27f4e094aea6e3e9cb53fd46 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:30:33 +0100 Subject: [PATCH 112/254] mov --- postgres/queries.py | 117 ---------------------- wannadb_web/postgres/queries.py | 168 ++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 117 deletions(-) delete mode 100644 postgres/queries.py create mode 100644 wannadb_web/postgres/queries.py diff --git a/postgres/queries.py b/postgres/queries.py deleted file mode 100644 index ec389325..00000000 --- a/postgres/queries.py +++ /dev/null @@ -1,117 +0,0 @@ -from typing import Union -import bcrypt -from psycopg2 import sql -from postgres.util import execute_query - - -def getUserID(user: str): - select_query = sql.SQL("SELECT id FROM users WHERE username = %s;") - return execute_query(select_query, (user,)) - - -def getOrganisationID(organisation_name: str): - select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") - response = execute_query(select_query, (organisation_name,)) - if response is None: - return -1 - return int(response[0]) - - -def getOrganisationName(organisation_id: int): - select_query = sql.SQL("SELECT name FROM organisations WHERE id = %s;") - response = execute_query(select_query, (organisation_id,)) - if response is None: - return -1 - return str(response[0]) - -def getMembersOfOrganisation(organisation_id: int): - select_query = sql.SQL("SELECT username FROM users WHERE id IN (SELECT userid FROM membership WHERE organisationid = %s);") - return execute_query(select_query, (organisation_id,)) - -def getMemberIDsFromOrganisationID(organisationID: int): - select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") - return execute_query(select_query, (organisationID,)) - - -def getOrganisationIDsFromUserId(userID: int): - try: - select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") - response = execute_query(select_query, (userID,)) - if isinstance(response, list): - return response[0], None - elif response is None: - return -1, None - else: - return None, "Unexpected response format" - - except Exception as e: - return None, e - - -def getOrganisationFromUserId(user_id: int): - try: - select_query = sql.SQL(""" SELECT organisationid, o.name - FROM membership - JOIN organisations o ON membership.organisationid = o.id - WHERE userid = %s;""") - response = execute_query(select_query, (user_id,)) - if isinstance(response, list): - organisations: list[dict[str, Union[str, int]]] = [] - for org in response: - organisations.append({"id": int(org[0]), "name": str(org[1])}) - return organisations, None - if response is None: - return [-1], None - return None, "Unexpected response format" - except Exception as e: - return None, e - - -def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: - select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") - _password, _id = execute_query(select_query, (user,))[0] - - try: - if _password: - stored_password = bytes(_password) - check = bcrypt.checkpw(password.encode('utf-8'), stored_password) - if check: - return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(_id) - - return False, "" - - except Exception as e: - print("checkPassword failed because: \n", e) - return False, str(e) - - -def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: - select_query = sql.SQL("SELECT membership from membership " - "where userid = (SELECT id from users where username = (%s)) " - "and " - "organisationid = (Select id from organisations where name = (%s))") - - result = execute_query(select_query, (organisationName, userName)) - try: - if result[0]: - authorisation = result[0] - return int(authorisation) # sketchy conversion but works - - except Exception as e: - print("checkOrganisationAuthorisation failed because: \n", e) - return 99 - - -def _getDocument(documentId: int): - select_query = sql.SQL("SELECT content " - "from documents " - "where id = (%s)") - - result = execute_query(select_query, (documentId,)) - try: - if result[0]: - content = result[0] - return str(content) - - except Exception as e: - print("checkOrganisationAuthorisation failed because: \n", e) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py new file mode 100644 index 00000000..389e7684 --- /dev/null +++ b/wannadb_web/postgres/queries.py @@ -0,0 +1,168 @@ +from typing import Union + +import bcrypt +from psycopg2 import sql + +from wannadb_web.postgres.util import execute_query + + +def getUserID(user: str): + select_query = sql.SQL("SELECT id FROM users WHERE username = %s;") + return execute_query(select_query, (user,)) + + +def getOrganisationID(organisation_name: str): + select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") + return execute_query(select_query, (organisation_name,)) + + +def getMemberIDsFromOrganisationID(organisationID: int): + select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") + return execute_query(select_query, (organisationID,)) + + +def getOrganisationIDsFromUserId(userID: int): + try: + select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") + response = execute_query(select_query, (userID,)) + if isinstance(response, list): + return response[0], None + elif response is None: + return [-1], None + else: + return None, "Unexpected response format" + + except Exception as e: + return None, e + + +def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: + select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") + _password, _id = execute_query(select_query, (user,))[0] + + try: + if _password: + stored_password = bytes(_password) + check = bcrypt.checkpw(password.encode('utf-8'), stored_password) + if check: + return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(_id) + + return False, "" + + except Exception as e: + print("checkPassword failed because: \n", e) + return False, str(e) + + +def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: + select_query = sql.SQL("SELECT membership from membership " + "where userid = (SELECT id from users where username = (%s)) " + "and " + "organisationid = (Select id from organisations where name = (%s))") + + result = execute_query(select_query, (organisationName, userName)) + try: + if result[0]: + authorisation = result[0] + return int(authorisation) # sketchy conversion but works + + except Exception as e: + print("checkOrganisationAuthorisation failed because: \n", e) + return 99 + + +def _getDocument(documentId: int): + select_query = sql.SQL("""SELECT content,content_byte + from documents + where id = (%s)""") + + result = execute_query(select_query, (documentId,)) + try: + if result[0]: + if result[0][0]: + content = result[0][0] + return str(content) + else: + content = result[0][1] + return bytes(content) + else: + return None + + except Exception as e: + print("_getDocument failed because: \n", e) + + +def getDocument(document_id: int, user_id: int): + select_query = sql.SQL("""SELECT name,content,content_byte + FROM documents + JOIN membership m ON documents.organisationid = m.organisationid + WHERE id = (%s) AND m.userid = (%s) + """) + + result = execute_query(select_query, (document_id, user_id,)) + try: + if len(result) > 0: + for document in result: + name = document[0] + if document[1]: + content = document[1] + return str(name), str(content) + elif document[2]: + content = document[2] + return str(name), bytes(content) + else: + return None + except Exception as e: + print("getDocument failed because:\n", e) + + +def getDocuments(document_ids: list[int], user_id: int): + select_query = sql.SQL(f"""SELECT name,content,content_byte + FROM documents + JOIN membership m ON documents.organisationid = m.organisationid + WHERE m.userid = (%s) and documents.id in + ({",".join(str(_id) for _id in document_ids)}) + """) + result = execute_query(select_query, (user_id,)) + try: + if len(result) > 0: + documents = [] + for document in result: + name = document[0] + if document[1]: + content = document[1] + documents.append((str(name), str(content))) + elif document[2]: + content = document[2] + documents.append((str(name), bytes(content))) + return documents + else: + return None + except Exception as e: + print("getDocuments failed because:\n", e) + + +def getDocument_ids(organisation_id: int, user_id: int): + select_query = sql.SQL("""SELECT name,content,content_byte + from documents + join membership m on documents.organisationid = m.organisationid + where m.organisationid = (%s) and m.userid = (%s) + """) + + result = execute_query(select_query, (organisation_id, user_id,)) + print(result) + documents = [] + try: + if len(result) > 0: + for document in result: + if document[1]: + name = document[0] + content = document[1] + documents.append((str(name), str(content))) + elif document[2]: + name = document[0] + content = document[2] + documents.append((str(name), bytes(content))) + return documents + except Exception as e: + print("getDocument_ids failed because: \n", e) From 3508a23f3445af258eea5dc9fae8fc825372b4d1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:30:54 +0100 Subject: [PATCH 113/254] add Signal.py --- wannadb_web/worker/Signal.py | 62 ++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 wannadb_web/worker/Signal.py diff --git a/wannadb_web/worker/Signal.py b/wannadb_web/worker/Signal.py new file mode 100644 index 00000000..410155d3 --- /dev/null +++ b/wannadb_web/worker/Signal.py @@ -0,0 +1,62 @@ +from dataclasses import dataclass +from typing import Any + + +class Signals: + def __init__(self): + self.feedback = Signal("feedback") + self.status = Signal("status") + self.finished = Signal("finished") + self.error = Signal("error") + self.document_base_to_ui = Signal("document_base_to_ui") + self.statistics_to_ui = Signal("statistics_to_ui") + self.feedback_request_to_ui = Signal("feedback_request_to_ui") + self.cache_db_to_ui = Signal("cache_db_to_ui") + + def print(self): + print(self.feedback) + print(self.status) + print(self.finished) + print(self.error) + print(self.document_base_to_ui) + print(self.statistics_to_ui) + print(self.feedback_request_to_ui) + print(self.cache_db_to_ui) + + def to_json(self): + try: + return {self.feedback.type: self.feedback.to_json(), + self.error.type: self.error.to_json(), + self.status.type: self.status.to_json(), + self.finished.type: self.finished.to_json(), + self.document_base_to_ui.type: self.document_base_to_ui.to_json(), + self.statistics_to_ui.type: self.statistics_to_ui.to_json(), + self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + except Exception as e: + print(e) + return {} + + +@dataclass +class Signal: + type: str + __msg: list[Any] + + def __init__(self, signal_type: str): + self.type = signal_type + self.__msg = [] + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': str(self.msg) + } + + def emit(self, *args: Any): + for arg in args: + self.msg.append(arg) From 417c63a354cb8de887dd0f5e126a03ecc27cdad1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:31:11 +0100 Subject: [PATCH 114/254] add RedisCache.py --- wannadb_web/Redis/RedisCache.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 wannadb_web/Redis/RedisCache.py diff --git a/wannadb_web/Redis/RedisCache.py b/wannadb_web/Redis/RedisCache.py new file mode 100644 index 00000000..5a8170d3 --- /dev/null +++ b/wannadb_web/Redis/RedisCache.py @@ -0,0 +1,28 @@ +from typing import Optional +import logging + +from wannadb_web.Redis import util + +logger = logging.getLogger(__name__) + + +class RedisCache: + def __init__(self, user_id: int) -> None: + """Initialize the RedisCache instance for a specific user.""" + self.redis_client = util.Redis_Connection.redis_client + self.user_space_key = f"user:{str(user_id)}" + + def set(self, key: str, value: str) -> None: + """Set a key-value pair in the user-specific space.""" + user_key = f"{self.user_space_key}:{key}" + self.redis_client.set(user_key, value) + + def get(self, key: str) -> Optional[str]: + """Get the value associated with a key in the user-specific space.""" + user_key = f"{self.user_space_key}:{key}" + return self.redis_client.get(user_key) + + def close(self) -> None: + """Close the Redis connection for the user-specific space.""" + self + pass From c7e9ea60da8a8bd0ff025e925321624e7fdba756 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:32:05 +0100 Subject: [PATCH 115/254] mov --- tmp/Web_API_Thread.py | 54 +++ tmp/Web_Thread_Manager.py | 48 +++ entrypoint.sh => wannadb_web/entrypoint.sh | 0 .../postgres}/transactions.py | 30 +- {postgres => wannadb_web/postgres}/util.py | 12 +- {flask_app => wannadb_web/routing}/user.py | 15 +- config.py => wannadb_web/util.py | 4 + wannadb_web/worker/Web_API.py | 320 ++++++++++++++++++ 8 files changed, 461 insertions(+), 22 deletions(-) create mode 100644 tmp/Web_API_Thread.py create mode 100644 tmp/Web_Thread_Manager.py rename entrypoint.sh => wannadb_web/entrypoint.sh (100%) rename {postgres => wannadb_web/postgres}/transactions.py (92%) rename {postgres => wannadb_web/postgres}/util.py (86%) rename {flask_app => wannadb_web/routing}/user.py (92%) rename config.py => wannadb_web/util.py (92%) create mode 100644 wannadb_web/worker/Web_API.py diff --git a/tmp/Web_API_Thread.py b/tmp/Web_API_Thread.py new file mode 100644 index 00000000..070f1675 --- /dev/null +++ b/tmp/Web_API_Thread.py @@ -0,0 +1,54 @@ +import logging +import threading +from datetime import datetime +from enum import Enum +from wannadb.data.data import Attribute, Document +from wannadb.statistics import Statistics +from wannadb_web.worker.Web_API import Web_API +from wannadb.resources import ResourceManager + +logger = logging.getLogger(__name__) + + +class Status(Enum): + """Gives the status of the application.""" + IDLE = 1 + RUNNING = 2 + CREATED = 3 + DEAD = 98 + ERROR = 99 + + +class Web_API_Thread(threading.Thread): + def __init__(self, thread_id): + super().__init__() + self.function = None + self.thread_id = thread_id + self.wannadb_web_api = Web_API() + self.event = threading.Event() + self.status = Status.IDLE + self.last_call = datetime.now() + self.exit_flag = False + + def run(self): + ResourceManager() + self.status = Status.RUNNING + while True: + if self.exit_flag: + self.status = Status.DEAD + logger.info(f"Thread {self.thread_id} exited") + return + self.event.wait() + self.event.clear() + if self.function is not None: + self.function() + self.last_call = datetime.now() + else: + raise Exception("No function set") + self.function = None + + def create_document_base(self, documents: [Document], attributes: [Attribute], statistics: Statistics): + if self.function is not None: + raise Exception("Function running") + self.function = lambda: self.wannadb_web_api.create_document_base_task(documents, attributes, statistics) + self.event.set() diff --git a/tmp/Web_Thread_Manager.py b/tmp/Web_Thread_Manager.py new file mode 100644 index 00000000..916382ce --- /dev/null +++ b/tmp/Web_Thread_Manager.py @@ -0,0 +1,48 @@ +import threading +import logging.config +import time +from datetime import datetime +from wannadb_web.worker.Web_API_Thread import Web_API_Thread + +logger = logging.getLogger(__name__) + + +class Web_Thread_Manager(threading.Thread): + def __init__(self, idle_time=60): + super().__init__() + logger.info("Web_Thread_Manager initialized") + self.idle_time = idle_time + self.threads: dict[int, Web_API_Thread] = {} + self.thread_limit = 2 + global web_Thread_Manager + web_Thread_Manager = self + + def run(self): + logger.info("Web_Thread_Manager running") + while True: + time.sleep(self.idle_time) + for thread_id, thread in self.threads.items(): + if not thread.is_alive(): + logger.info(f"Thread {thread_id} cleaned") + del self.threads[thread_id] + elif (datetime.now() - thread.last_call).total_seconds() > self.idle_time: + thread.exit_flag = True + + def access_thread(self, thread_id): + if thread_id not in self.threads: + logger.error("Thread not found") + raise threading.ThreadError("Thread not found") + logger.debug(f"Thread {thread_id} accessed") + return self.threads[thread_id] + + def new_thread(self, thread_id): + if thread_id in self.threads: + logger.debug(f"Thread {thread_id} already exists") + return self.threads[thread_id] + if len(self.threads) >= self.thread_limit: + logger.error("Thread limit reached") + raise threading.ThreadError("Thread limit reached") + thread = Web_API_Thread(thread_id) + thread.start() + logger.debug(f"Thread {thread_id} created and started") + return thread diff --git a/entrypoint.sh b/wannadb_web/entrypoint.sh similarity index 100% rename from entrypoint.sh rename to wannadb_web/entrypoint.sh diff --git a/postgres/transactions.py b/wannadb_web/postgres/transactions.py similarity index 92% rename from postgres/transactions.py rename to wannadb_web/postgres/transactions.py index 9ea81562..29de6e3f 100644 --- a/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -1,8 +1,10 @@ +from typing import Union + import bcrypt from psycopg2 import sql, IntegrityError -from config import Token, Authorisation, tokenDecode -from postgres.queries import checkPassword -from postgres.util import execute_transaction +from wannadb_web.util import Token, Authorisation, tokenDecode +from wannadb_web.postgres.queries import checkPassword +from wannadb_web.postgres.util import execute_transaction # WARNING: This is only for development purposes! @@ -59,8 +61,9 @@ def createDocumentsTable(schema): create_table_query = sql.SQL(f"""CREATE TABLE {schema}.documents ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), - name text COLLATE pg_catalog."default" NOT NULL, - content text COLLATE pg_catalog."default" NOT NULL, + name text NOT NULL, + content text , + content_byte bytea, organisationid bigint NOT NULL, userid bigint NOT NULL, CONSTRAINT dokumentid PRIMARY KEY (id), @@ -74,6 +77,8 @@ def createDocumentsTable(schema): ON UPDATE CASCADE ON DELETE CASCADE NOT VALID + CONSTRAINT check_only_one_filled + check (((content IS NOT NULL) AND (content_byte IS NULL)) OR ((content IS NOT NULL) AND (content_byte IS NULL))) ) TABLESPACE pg_default;""") @@ -142,7 +147,7 @@ def addUser(user: str, password: str): pwBytes = password.encode('utf-8') salt = bcrypt.gensalt() pwHash = bcrypt.hashpw(pwBytes, salt) - # Needed this for the correct password check dont know why... + # Needed this for the correct password check don't know why... pwHash = pwHash.decode('utf-8') insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") @@ -198,7 +203,6 @@ def addOrganisation(organisationName: str, sessionToken: str): try: token: Token = tokenDecode(sessionToken) userid = token.id - insert_query = sql.SQL("with a as (INSERT INTO organisations (name) VALUES (%s) returning id) " "INSERT INTO membership (userid,organisationid) select (%s),id from a returning organisationid") organisation_id = execute_transaction(insert_query, (organisationName, userid), commit=True) @@ -344,13 +348,19 @@ def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: print("adjUserAuthorisation failed because: \n", e) -def addDocument(name: str, content: str, organisationId: int, userid: int): +def addDocument(name: str, content: Union[str, bytes], organisationId: int, userid: int): try: - insert_data_query = sql.SQL("INSERT INTO documents (name,content,organisationid,userid) " - "VALUES (%s, %s,%s, %s) returning id;") + if isinstance(content, str): + insert_data_query = sql.SQL("INSERT INTO documents (name,content,organisationid,userid) " + "VALUES (%s, %s,%s, %s) returning id;") + else: + insert_data_query = sql.SQL("INSERT INTO documents (name,content_byte,organisationid,userid) " + "VALUES (%s, %s,%s, %s) returning id;") data_to_insert = (name, content, organisationId, userid) response = execute_transaction(insert_data_query, data_to_insert, commit=True) return int(response[0][0]) + except IntegrityError: + return -1 except Exception as e: print("addDocument failed because: \n", e) diff --git a/postgres/util.py b/wannadb_web/postgres/util.py similarity index 86% rename from postgres/util.py rename to wannadb_web/postgres/util.py index dd24f41c..59a907cb 100644 --- a/postgres/util.py +++ b/wannadb_web/postgres/util.py @@ -1,13 +1,15 @@ +import os + import psycopg2 from psycopg2 import extensions, IntegrityError, sql from psycopg2.sql import SQL -DB_NAME = "userManagement" -DB_USER = "postgres" -DB_PASSWORD = "0" -DB_HOST = "postgres" +DB_NAME = os.environ.get("DATABASE_NAME") +DB_USER = os.environ.get("DATABASE_USER") +DB_PASSWORD = os.environ.get("DATABASE_PASSWORD") +DB_HOST = os.environ.get("DATABASE_HOST") #DB_HOST = "127.0.0.1" -DB_PORT = "5432" +DB_PORT = os.environ.get("DATABASE_PORT") def connectPG(): diff --git a/flask_app/user.py b/wannadb_web/routing/user.py similarity index 92% rename from flask_app/user.py rename to wannadb_web/routing/user.py index 1ecf1edf..bbfdc13f 100644 --- a/flask_app/user.py +++ b/wannadb_web/routing/user.py @@ -1,10 +1,10 @@ # main_routes.py from flask import Blueprint, request, make_response -from config import Token, tokenEncode, tokenDecode -from postgres.queries import (checkPassword, getMembersOfOrganisation, - getOrganisationIDsFromUserId, getOrganisationName, getOrganisationFromUserId) -from postgres.transactions import addUser, addOrganisation, addUserToOrganisation, addUserToOrganisation2, deleteUser, leaveOrganisation +from wannadb_web.util import Token, tokenEncode, tokenDecode +from wannadb_web.postgres.queries import checkPassword, getOrganisationIDsFromUserId +from wannadb_web.postgres.transactions import (addUser, addOrganisation, addUserToOrganisation2, deleteUser, + leaveOrganisation) user_management = Blueprint('user_management', __name__) @@ -23,9 +23,10 @@ def register(): return make_response({'message': 'User registered successfully', 'token': token}, 201) - if _id < 0: + elif _id < 0: return make_response({'message': 'Conflicting username'}, 409) - return make_response({'message': 'User register failed'}, 422) + else: + return make_response({'message': 'User register failed'}, 422) @user_management.route('/login', methods=['POST']) @@ -174,5 +175,5 @@ def get_organisation_members(_id): members = [] for member in members_raw: members.append(member[0]) - + return make_response({"members": members}, 200) \ No newline at end of file diff --git a/config.py b/wannadb_web/util.py similarity index 92% rename from config.py rename to wannadb_web/util.py index 919a70ca..92b857de 100644 --- a/config.py +++ b/wannadb_web/util.py @@ -1,9 +1,13 @@ import datetime +import json +import logging from enum import Enum from typing import Any import jwt +logger: logging.Logger = logging.getLogger(__name__) + class Authorisation(Enum): Owner = 0 diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py new file mode 100644 index 00000000..738fa7e9 --- /dev/null +++ b/wannadb_web/worker/Web_API.py @@ -0,0 +1,320 @@ +import csv +import io +import json +import logging +from typing import Optional + +from wannadb.configuration import Pipeline +from wannadb.data.data import Attribute, Document, DocumentBase +from wannadb.interaction import EmptyInteractionCallback, InteractionCallback +from wannadb.matching.distance import SignalsMeanDistance +from wannadb.matching.matching import RankingBasedMatcher +from wannadb.preprocessing.embedding import BERTContextSentenceEmbedder, RelativePositionEmbedder, \ + SBERTTextEmbedder, SBERTLabelEmbedder +from wannadb.preprocessing.extraction import StanzaNERExtractor, SpacyNERExtractor +from wannadb.preprocessing.label_paraphrasing import OntoNotesLabelParaphraser, \ + SplitAttributeNameLabelParaphraser +from wannadb.preprocessing.normalization import CopyNormalizer +from wannadb.preprocessing.other_processing import ContextSentenceCacher +from wannadb.statistics import Statistics +from wannadb.status import StatusCallback +from wannadb_web.Redis.RedisCache import RedisCache +from wannadb_web.SQLite import Cache_DB +from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper +from wannadb_web.postgres.queries import getDocument +from wannadb_web.postgres.transactions import addDocument +from wannadb_web.worker.util import TaskObject + +logger = logging.getLogger(__name__) + + +class WannaDB_WebAPI: + + def __init__(self, user_id: int, task_object:TaskObject): + logger.info("WannaDB_WebAPI initialized") + self.user_id = user_id + self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") + self.redisCache = RedisCache(user_id) + self.status_callback = task_object.status_callback + self.interaction_callback = task_object.interaction_callback + self.signals = task_object.signals + + def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): + logger.debug("Called slot 'create_document_base'.") + self.signals.status.emit("Creating document base...", -1) + try: + self.sqLiteCacheDBWrapper.reset_cache_db() + + document_base = DocumentBase(documents, attributes) + self.sqLiteCacheDBWrapper.cache_db.create_input_docs_table("input_document", document_base.documents) + + if not document_base.validate_consistency(): + logger.error("Document base is inconsistent!") + error = "Document base is inconsistent!" + return error + + # load default preprocessing phase + self.signals.status.emit("Loading preprocessing phase...", -1) + + # noinspection PyTypeChecker + preprocessing_phase = Pipeline([ + StanzaNERExtractor(), + SpacyNERExtractor("SpacyEnCoreWebLg"), + ContextSentenceCacher(), + CopyNormalizer(), + OntoNotesLabelParaphraser(), + SplitAttributeNameLabelParaphraser(do_lowercase=True, splitters=[" ", "_"]), + SBERTLabelEmbedder("SBERTBertLargeNliMeanTokensResource"), + SBERTTextEmbedder("SBERTBertLargeNliMeanTokensResource"), + BERTContextSentenceEmbedder("BertLargeCasedResource"), + RelativePositionEmbedder() + ]) + + preprocessing_phase(document_base, EmptyInteractionCallback(), self.status_callback, statistics) + + self.signals.document_base_to_ui.emit(document_base) + self.signals.statistics_to_ui.emit(statistics) + self.signals.finished.emit("Finished!") + + except Exception as e: + self.signals.error.emit(e) + + def load_document_base_from_bson(self, document_id: int, user_id: int): + logger.debug("Called function 'load_document_base_from_bson'.") + wrapper_cache_db: Optional[SQLiteCacheDBWrapper] = None + try: + wrapper_cache_db = Cache_DB.Cache_Manager.user(user_id) + cache_db = wrapper_cache_db.cache_db + + document = getDocument(document_id, user_id) + if isinstance(document, str): + logger.error("document is not a DocumentBase!") + return -1 + document_base = DocumentBase.from_bson(document) + + if not document_base.validate_consistency(): + logger.error("Document base is inconsistent!") + return -1 + + wrapper_cache_db.reset_cache_db() + + for attribute in document_base.attributes: + cache_db.create_table_by_name(attribute.name) + cache_db.create_input_docs_table("input_document", document_base.documents) + + logger.info(f"Document base loaded from BSON with ID {document_id}.") + return document_base + + except Exception as e: + logger.error(str(e)) + return -1 + finally: + if wrapper_cache_db is not None: + wrapper_cache_db.disconnect() + + def save_document_base_to_bson(self, name: str, organisation_id: int, document_base: DocumentBase, user_id: int): + logger.debug("Called function 'save_document_base_to_bson'.") + try: + document_id = addDocument(name, document_base.to_bson(), organisation_id, user_id) + if document_id is None: + logger.error("Document base could not be saved to BSON!") + elif document_id == -1: + logger.error("Document base could not be saved to BSON! Document name already exists!") + return -1 + logger.info(f"Document base saved to BSON with ID {document_id}.") + return document_id + except Exception as e: + logger.debug(str(e)) + + def save_table_to_csv(self, document_base: DocumentBase): + logger.debug("Called function 'save_table_to_csv'.") + try: + buffer = io.StringIO() + + # check that the table is complete + for attribute in document_base.attributes: + for document in document_base.documents: + if attribute.name not in document.attribute_mappings.keys(): + logger.error("Cannot save a table with unpopulated attributes!") + return -1 + + # TODO: currently stores the text of the first matching nugget (if there is one) + table_dict = document_base.to_table_dict("text") + headers = list(table_dict.keys()) + rows = [] + for ix in range(len(table_dict[headers[0]])): + row = [] + for header in headers: + if header == "document-name": + row.append(table_dict[header][ix]) + elif not table_dict[header][ix]: + row.append(None) + else: + row.append(table_dict[header][ix][0]) + rows.append(row) + writer = csv.writer(buffer, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL) + writer.writerow(headers) + writer.writerows(rows) + except FileNotFoundError: + logger.error("Directory does not exist!") + except Exception as e: + logger.error(str(e)) + + def add_attribute(self, name: str, document_base: DocumentBase): + logger.debug("Called function 'add_attribute'.") + try: + if name in [attribute.name for attribute in document_base.attributes]: + logger.error("Attribute name already exists!") + return -1 + elif name == "": + logger.error("Attribute name must not be empty!") + return -1 + else: + document_base.attributes.append(Attribute(name)) + logger.debug(f"Attribute '{name}' added.") + return 0 + except Exception as e: + logger.error(str(e)) + + def add_attributes(self, names: str, document_base: DocumentBase): + logger.debug("Called function 'add_attributes'.") + try: + already_existing_names = [] + for name in names: + if name in [attribute.name for attribute in document_base.attributes]: + logger.info(f"Attribute name '{name}' already exists and was thus not added.") + already_existing_names.append(name) + elif name == "": + logger.info("Attribute name must not be empty and was thus ignored.") + else: + document_base.attributes.append(Attribute(name)) + logger.debug(f"Attribute '{name}' added.") + return already_existing_names + except Exception as e: + logger.error(str(e)) + + def remove_attribute(self, name: str, document_base: DocumentBase): + logger.debug("Called function 'remove_attribute'.") + try: + if name in [attribute.name for attribute in document_base.attributes]: + for document in document_base.documents: + if name in document.attribute_mappings.keys(): + del document.attribute_mappings[name] + + for attribute in document_base.attributes: + if attribute.name == name: + document_base.attributes.remove(attribute) + break + return 0 + else: + logger.error("Attribute name does not exist!") + return -1 + except Exception as e: + logger.error(str(e)) + + def forget_matches_for_attribute(self, name: str, document_base: DocumentBase): + logger.debug("Called function 'forget_matches_for_attribute'.") + try: + if name in [attribute.name for attribute in document_base.attributes]: + for document in document_base.documents: + if name in document.attribute_mappings.keys(): + del document.attribute_mappings[name] + return 0 + else: + logger.error("Attribute name does not exist!") + return -1 + except Exception as e: + logger.error(str(e)) + + def forget_matches(self, name: str, user_id: int, document_base: DocumentBase): + logger.debug("Called function 'forget_matches'.") + wrapper_cache_db: Optional[SQLiteCacheDBWrapper] = None + try: + wrapper_cache_db = Cache_DB.Cache_Manager.user(user_id) + cache_db = wrapper_cache_db.cache_db + + for attribute in document_base.attributes: + cache_db.delete_table(attribute.name) + cache_db.create_table_by_name(attribute.name) + for document in document_base.documents: + document.attribute_mappings.clear() + logger.debug(f"Matche: {name} forgotten.") + return 0 + except Exception as e: + logger.error(str(e)) + return -1 + finally: + if wrapper_cache_db is not None: + wrapper_cache_db.disconnect() + + def save_statistics_to_json(self, statistics: Statistics): + logger.debug("Called function 'save_statistics_to_json'.") + try: + return json.dumps(statistics.to_serializable(), indent=2) + except Exception as e: + logger.error(str(e)) + + def interactive_table_population(self, document_base: DocumentBase, statistics: Statistics): + logger.debug("Called slot 'interactive_table_population'.") + try: + # load default matching phase + self.signals.status.emit("Loading matching phase...", -1) + + # TODO: this should not be implemented here! + def find_additional_nuggets(nugget, documents): + new_nuggets = [] + for document in documents: + doc_text = document.text.lower() + nug_text = nugget.text.lower() + start = 0 + while True: + start = doc_text.find(nug_text, start) + if start == -1: + break + else: + new_nuggets.append((document, start, start + len(nug_text))) + start += len(nug_text) + return new_nuggets + + matching_phase = Pipeline( + [ + SplitAttributeNameLabelParaphraser(do_lowercase=True, splitters=[" ", "_"]), + ContextSentenceCacher(), + SBERTLabelEmbedder("SBERTBertLargeNliMeanTokensResource"), + RankingBasedMatcher( + distance=SignalsMeanDistance( + signal_identifiers=[ + "LabelEmbeddingSignal", + "TextEmbeddingSignal", + "ContextSentenceEmbeddingSignal", + "RelativePositionSignal" + ] + ), + max_num_feedback=100, + len_ranked_list=10, + max_distance=0.2, + num_random_docs=1, + sampling_mode="AT_MAX_DISTANCE_THRESHOLD", + adjust_threshold=True, + nugget_pipeline=Pipeline( + [ + ContextSentenceCacher(), + CopyNormalizer(), + OntoNotesLabelParaphraser(), + SplitAttributeNameLabelParaphraser(do_lowercase=True, splitters=[" ", "_"]), + SBERTLabelEmbedder("SBERTBertLargeNliMeanTokensResource"), + SBERTTextEmbedder("SBERTBertLargeNliMeanTokensResource"), + BERTContextSentenceEmbedder("BertLargeCasedResource"), + RelativePositionEmbedder() + ] + ), + find_additional_nuggets=find_additional_nuggets + ) + ] + ) + + matching_phase(document_base, self.interaction_callback, self.status_callback, statistics) + self.signals.document_base_to_ui.emit(document_base) + self.signals.finished.emit("Finished!") + except Exception as e: + self.signals.error.emit(e) From de1dd5890ec9c8f1f70194290c4b067e1eb62956 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:32:28 +0100 Subject: [PATCH 116/254] add utils --- wannadb_web/Redis/util.py | 56 ++++++++++++++++++++ wannadb_web/SQLite/util.py | 31 +++++++++++ wannadb_web/worker/util.py | 104 +++++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 wannadb_web/Redis/util.py create mode 100644 wannadb_web/SQLite/util.py create mode 100644 wannadb_web/worker/util.py diff --git a/wannadb_web/Redis/util.py b/wannadb_web/Redis/util.py new file mode 100644 index 00000000..0a20b799 --- /dev/null +++ b/wannadb_web/Redis/util.py @@ -0,0 +1,56 @@ +import os +from typing import Optional +import logging + +import redis + +CACHE_HOST = os.environ.get("CACHE_HOST", "127.0.0.1") +CACHE_PORT = int(os.environ.get("CACHE_PORT", 6379)) +CACHE_DB = int(os.environ.get("CACHE_DB", 0)) +CACHE_PASSWORD = os.environ.get("CACHE_PASSWORD") + +logger = logging.getLogger(__name__) + +Redis_Connection: Optional["RedisConnection"] = None + + +def connectRedis(): + try: + redis_client = redis.Redis( + host=CACHE_HOST, + port=CACHE_PORT, + db=CACHE_DB, + password=CACHE_PASSWORD, + ) + return redis_client + except Exception as e: + raise Exception("Redis connection failed because:", e) + + +class RedisConnection: + def __init__(self) -> None: + """Initialize the Redis_Connection manager.""" + global Redis_Connection + if Redis_Connection is not None: + logger.error("There can only be one Redis_Connection!") + raise RuntimeError("There can only be one Redis_Connection!") + else: + Redis_Connection = self + self.redis_client = connectRedis() + logger.info("Initialized the Redis_Connection.") + + def __enter__(self) -> "RedisConnection": + """Enter the Redis_Connection context.""" + logger.info("Entered the Redis_Connection.") + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Exit the Redis_Connection context.""" + logger.info("Kill all Redis connections") + global Redis_Connection + if Redis_Connection is None: + logger.error("Redis_Connection is None!") + raise RuntimeError("Redis_Connection is None!") + Redis_Connection.redis_client.close() + Redis_Connection = None + logger.info("Exited the resource manager.") diff --git a/wannadb_web/SQLite/util.py b/wannadb_web/SQLite/util.py new file mode 100644 index 00000000..ea9c157c --- /dev/null +++ b/wannadb_web/SQLite/util.py @@ -0,0 +1,31 @@ +import sqlite3 +from sqlite3 import Error + + +def create_connection(db_file, user_id): + """ create a database connection to the SQLite database + specified by db_file with user-specific identifier + :param db_file: general database file + :param user_id: user-specific identifier + :return: Connection object or None + """ + conn = None + try: + db_identifier = f"{db_file}_{user_id}" + conn = sqlite3.connect(db_identifier, check_same_thread=False) + conn.row_factory = sqlite3.Row + return conn + except Error as e: + print(e) + + return conn + + +def alter_table(conn, entry): + if entry["type"] is None: + entry["type"] = 'text' + sql = ''' ALTER TABLE {} ADD COLUMN {} {}'''.format(entry["table"], entry["attribute"], entry["type"]) + cur = conn.cursor() + cur.execute(sql) + conn.commit() + return cur.lastrowid diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py new file mode 100644 index 00000000..4993e6c3 --- /dev/null +++ b/wannadb_web/worker/util.py @@ -0,0 +1,104 @@ +import enum +import pickle +from dataclasses import dataclass +from typing import Callable, Any, Optional + +from wannadb.interaction import InteractionCallback +from wannadb.status import StatusCallback +from wannadb_web.worker.Signal import Signals + + +class TaskUpdate: + """Task callback that is initialized with a callback function.""" + + def __init__(self, callback_fn: Callable[[str, Any], None]): + """ + Initialize the Task callback. + + :param callback_fn: callback function that is called whenever the interaction callback is called + """ + self._callback_fn: Callable[[str, Any], None] = callback_fn + + def __call__(self, state: str, context: Any) -> None: + return self._callback_fn(state, context) + + +class State(enum.Enum): + STARTED = 'STARTED' + PENDING = 'PENDING' + SUCCESS = 'SUCCESS' + FAILURE = 'FAILURE' + + +@dataclass +class TaskObject: + """Class for representing the response of a task.""" + + msg: str + __signals: Signals + + def __init__(self, task_update_fn: Optional[TaskUpdate], state=State.STARTED): + self.task_update_fn = task_update_fn + self.__state = state + self.__signals = Signals() + + @property + def status_callback(self): + def status_callback_fn(message, progress) -> None: + self.signals.status.emit(message, progress) + self.update(State.PENDING) + + return StatusCallback(status_callback_fn) + + @property + def interaction_callback(self): + def interaction_callback_fn(pipeline_element_identifier, feedback_request): + feedback_request["identifier"] = pipeline_element_identifier + self.signals.feedback_request_to_ui.emit(feedback_request) + self.update(State.PENDING) + return self.signals.feedback + + return InteractionCallback(interaction_callback_fn) + + @property + def state(self) -> State: + return self.__state + + @property + def signals(self) -> Signals: + return self.__signals + + def __set_state(self, state: State): + if not isinstance(state, State): + print("update error Invalid state", state) + raise Exception("update error Invalid state") + if state is None: + print("update error State is none", state) + raise Exception("update error State is none") + self.__state = state + + def __set_signals(self, signals: Signals): + self.__signals = signals + + def update(self, state: State, msg=""): + if isinstance(state, State) and state is not None: + self.__set_state(state) + self.msg = msg + self.task_update_fn(self.state.value, self) + else: + raise Exception("update error State is none") + + + def to_dump(self): + state = self.state + signals = self.signals + msg = self.msg + return pickle.dumps((state, signals, msg)) + + @staticmethod + def from_dump(dump: bytes): + state, signals, msg = pickle.loads(dump) + to = TaskObject(None, state=state) + to.__set_signals(signals) + to.msg = msg + return to From e147a5e5478a820787c7b6cb1a1ddb4b458a8d39 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:32:44 +0100 Subject: [PATCH 117/254] add inits --- wannadb_web/Redis/__init__.py | 0 wannadb_web/SQLite/__init__.py | 0 wannadb_web/__init__.py | 0 wannadb_web/postgres/__init__.py | 0 wannadb_web/routing/__init__.py | 0 wannadb_web/worker/__init__.py | 0 6 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 wannadb_web/Redis/__init__.py create mode 100644 wannadb_web/SQLite/__init__.py create mode 100644 wannadb_web/__init__.py create mode 100644 wannadb_web/postgres/__init__.py create mode 100644 wannadb_web/routing/__init__.py create mode 100644 wannadb_web/worker/__init__.py diff --git a/wannadb_web/Redis/__init__.py b/wannadb_web/Redis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wannadb_web/SQLite/__init__.py b/wannadb_web/SQLite/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wannadb_web/__init__.py b/wannadb_web/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wannadb_web/postgres/__init__.py b/wannadb_web/postgres/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wannadb_web/routing/__init__.py b/wannadb_web/routing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wannadb_web/worker/__init__.py b/wannadb_web/worker/__init__.py new file mode 100644 index 00000000..e69de29b From a8eb7cf8912de3a809e4b9ab3b37a2b1ed3c3094 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:33:49 +0100 Subject: [PATCH 118/254] replaced with files.py --- flask_app/endpoints.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 flask_app/endpoints.py diff --git a/flask_app/endpoints.py b/flask_app/endpoints.py deleted file mode 100644 index f4793068..00000000 --- a/flask_app/endpoints.py +++ /dev/null @@ -1,37 +0,0 @@ -from flask import Blueprint, request, make_response - -from config import tokenDecode -from postgres.transactions import addDocument - -main_routes = Blueprint('main_routes', __name__, url_prefix='/data') - - -@main_routes.route('/upload', methods=['POST']) -def upload_files(): - files = request.files.getlist('file') - form = request.form - - authorization = request.headers.get("authorization") - organisation_id = int(form.get("organisationId")) - - token = tokenDecode(authorization) - - document_ids: list = [] - - for file in files: - content_type = file.content_type - if 'text/plain' in content_type: - filename = file.filename - content = str(file.stream.read().decode('utf-8')) - dokument_id = addDocument(filename, content, organisation_id, token.id) - document_ids.append(dokument_id) - else: - document_ids.append(f"wrong type {content_type}") - - if all(isinstance(document_ids, str) for _ in document_ids): - return make_response(document_ids, 400) - if any(isinstance(document_ids, str) for _ in document_ids): - return make_response(document_ids, 207) - return make_response(document_ids, 201) - - From d97ccb70ecac345be0b4c8998dd8debc35d58ac9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Fri, 12 Jan 2024 15:34:05 +0100 Subject: [PATCH 119/254] add tasks.py --- wannadb_web/worker/tasks.py | 97 +++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 wannadb_web/worker/tasks.py diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py new file mode 100644 index 00000000..3e19edaa --- /dev/null +++ b/wannadb_web/worker/tasks.py @@ -0,0 +1,97 @@ +import pickle +import random +import time + +from celery import current_app + +from wannadb.data.data import Document, Attribute +from wannadb.statistics import Statistics +from wannadb_web.postgres.queries import getDocuments +from wannadb_web.worker.Web_API import WannaDB_WebAPI +from wannadb_web.worker.util import TaskObject, State, TaskUpdate + + +class U: + + def update_state(*args, **kwargs): + print('update_state called with args: ', args, ' and kwargs: ', kwargs) + print("meta: ", TaskObject.from_dump(kwargs.get("meta")).signals.to_json()) + + +@current_app.task(bind=True) +def create_document_base_task(self, user_id, document_ids: [int], attributes_dump: bytes, statistics_dump: bytes): + attributes: list[Attribute] = pickle.loads(attributes_dump) + statistics: Statistics = pickle.loads(statistics_dump) + + def task_callback_fn(state: str, meta: TaskObject): + if isinstance(state, str) and state is not None and len(state) > 0: + meta_dump = meta.to_dump() + self.update_state(state=state, meta=meta_dump) + else: + raise Exception("task_callback_fn error Invalid state") + + task_callback = TaskUpdate(task_callback_fn) + + task_object = TaskObject(task_callback) + + api = WannaDB_WebAPI(1, task_object) + + task_object.update(state=State.PENDING, msg="api created") + try: + if not isinstance(attributes[0], Attribute): + task_object.update(State.FAILURE, "Invalid attributes") + raise Exception("Invalid attributes") + + if not isinstance(statistics, Statistics): + task_object.update(State.FAILURE, "Invalid statistics") + raise Exception("Invalid statistics") + + docs = getDocuments(document_ids, user_id) + task_object.update(State.PENDING, "Creating document base") + documents = [] + if docs: + for doc in docs: + documents.append(Document(doc[0], doc[1])) + else: + print("No documents found") + # raise Exception("No documents found") + + api.create_document_base(documents, attributes, statistics) + return task_object.to_dump() + + except Exception as e: + self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) + + +@current_app.task(bind=True) +def long_task(self): + try: + """Background task that runs a long function with progress reports.""" + verb = ['Starting up', 'Booting', 'Repairing', 'Loading', 'Checking'] + adjective = ['master', 'radiant', 'silent', 'harmonic', 'fast'] + noun = ['solar array', 'particle reshaper', 'cosmic ray', 'orbiter', 'bit'] + data = '' + total = random.randint(10, 50) + + def task_callback_fn(state: str, meta: TaskObject): + if not isinstance(state, str): + raise Exception("task_callback_fn error Invalid state") + meta_dump = meta.to_dump() + self.update_state(state=state, meta=meta_dump) + + task_callback = TaskUpdate(task_callback_fn) + + task_object = TaskObject(task_callback) + + for i in range(total): + if not data or random.random() < 0.25: + data = '{0} {1} {2}...'.format(random.choice(verb), + random.choice(adjective), + random.choice(noun)) + time.sleep(1) + task_object.update(state=State.PENDING, msg=data) + task_object.update(state=State.SUCCESS, msg='Task completed!') + return data + except Exception as e: + self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) + raise From d29a75232262bad7c0b3b75d2cac6e4f70ff99a6 Mon Sep 17 00:00:00 2001 From: cophilot Date: Fri, 12 Jan 2024 17:14:24 +0100 Subject: [PATCH 120/254] update --- Dockerfile | 1 - docker-compose.yaml | 139 +++++++++++++++++++++----------------------- 2 files changed, 66 insertions(+), 74 deletions(-) diff --git a/Dockerfile b/Dockerfile index 51c18910..a4241698 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,6 @@ RUN apt-get update \ && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ && rm -rf /var/lib/apt/lists/* - RUN mkdir /home/wannadb WORKDIR /home/wannadb diff --git a/docker-compose.yaml b/docker-compose.yaml index 7ed9759e..152ba1e8 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,82 +1,75 @@ -version: "3.6" +version: '3.6' services: - wannadb: - build: - dockerfile: Dockerfile - target: dev - restart: always - tty: true - ports: - - "8000:8000" - env_file: - - wannadb_web/.env/.dev - depends_on: - - postgres - - redis - volumes: - - ./:/home/wannadb - networks: - - mynetwork + wannadb: + build: . + restart: always + tty: true + ports: + - '8000:8000' + env_file: + - wannadb_web/.env/.dev + depends_on: + - postgres + - redis + volumes: + - ./:/home/wannadb + networks: + - mynetwork - worker: - build: - dockerfile: Dockerfile - target: worker - tty: true - command: [ 'celery', '-A', 'app.celery','worker', '-l', 'info'] - env_file: - - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb - networks: - - mynetwork - depends_on: - - wannadb - - redis + worker: + build: . + tty: true + command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + depends_on: + - wannadb + - redis + flower: + build: . + tty: true + command: ['celery', '-A', 'app.celery', 'flower'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + ports: + - '5555:5555' + depends_on: + - wannadb + - redis - flower: - build: - dockerfile: Dockerfile - target: worker - tty: true - command: [ 'celery', '-A', 'app.celery', 'flower' ] - env_file: - - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb - networks: - - mynetwork - ports: - - "5555:5555" - depends_on: - - wannadb - - redis + postgres: + image: postgres + container_name: postgres-container + environment: + POSTGRES_PASSWORD: 0 + POSTGRES_DB: userManagement + networks: + - mynetwork + ports: + - '5432:5432' + volumes: + - pgdata:/var/lib/postgresql/data - postgres: - image: postgres - container_name: postgres-container - environment: - POSTGRES_PASSWORD: 0 - POSTGRES_DB: userManagement - networks: - - mynetwork - ports: - - "5432:5432" - volumes: - - pgdata:/var/lib/postgresql/data - - redis: - image: redis:alpine - container_name: redis-container - ports: - - "6379:6379" - networks: - - mynetwork + redis: + image: redis:alpine + container_name: redis-container + ports: + - '6379:6379' + networks: + - mynetwork networks: - mynetwork: - driver: bridge + mynetwork: + driver: bridge volumes: - pgdata: + pgdata: From 40a96f670e418a5218246a37d0c774be14de8924 Mon Sep 17 00:00:00 2001 From: cophilot Date: Fri, 12 Jan 2024 21:20:55 +0100 Subject: [PATCH 121/254] bug fixes and added get-files-for-org endpoint --- docker-compose.yaml | 15 +++++-- wannadb_web/postgres/queries.py | 59 ++++++++++++++++++++++++++++ wannadb_web/postgres/transactions.py | 54 ++++++++++++------------- wannadb_web/routing/files.py | 23 ++++++++--- wannadb_web/routing/user.py | 3 +- 5 files changed, 118 insertions(+), 36 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 152ba1e8..d14364a3 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,10 @@ version: '3.6' services: wannadb: - build: . + build: + context: . + dockerfile: Dockerfile + target: dev restart: always tty: true ports: @@ -17,7 +20,10 @@ services: - mynetwork worker: - build: . + build: + context: . + dockerfile: Dockerfile + target: worker tty: true command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] env_file: @@ -31,7 +37,10 @@ services: - redis flower: - build: . + build: + context: . + dockerfile: Dockerfile + target: worker tty: true command: ['celery', '-A', 'app.celery', 'flower'] env_file: diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 389e7684..65c703f4 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -15,6 +15,16 @@ def getOrganisationID(organisation_name: str): select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") return execute_query(select_query, (organisation_name,)) +def getOrganisationName(organisation_id: int): + select_query = sql.SQL("SELECT name FROM organisations WHERE id = %s;") + response = execute_query(select_query, (organisation_id,)) + if response is None: + return -1 + return str(response[0]) + +def getMembersOfOrganisation(organisation_id: int): + select_query = sql.SQL("SELECT username FROM users WHERE id IN (SELECT userid FROM membership WHERE organisationid = %s);") + return execute_query(select_query, (organisation_id,)) def getMemberIDsFromOrganisationID(organisationID: int): select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") @@ -35,6 +45,23 @@ def getOrganisationIDsFromUserId(userID: int): except Exception as e: return None, e +def getOrganisationFromUserId(user_id: int): + try: + select_query = sql.SQL(""" SELECT organisationid, o.name + FROM membership + JOIN organisations o ON membership.organisationid = o.id + WHERE userid = %s;""") + response = execute_query(select_query, (user_id,)) + if isinstance(response, list): + organisations: list[dict[str, Union[str, int]]] = [] + for org in response: + organisations.append({"id": int(org[0]), "name": str(org[1])}) + return organisations, None + if response is None: + return [-1], None + return None, "Unexpected response format" + except Exception as e: + return None, e def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") @@ -114,6 +141,38 @@ def getDocument(document_id: int, user_id: int): return None except Exception as e: print("getDocument failed because:\n", e) + +def getDocumentsForOrganization(organisation_id: int): + try: + select_query = sql.SQL("""SELECT id, name,content,content_byte + FROM documents + WHERE organisationid = (%s) + """) + result = execute_query(select_query, (organisation_id,)) + + if result == None or len(result) == 0: + return [] + + doc_array = [] + + for document in result: + id = document[0] + name = document[1] + content = ''; + if document[2]: + content = document[2] + elif document[3]: + content = document[3] + doc_array.append({ + "id": id, + "name": name, + "content": content + }) + return doc_array + + except Exception as e: + print("getDocumentsForOrganization failed because:\n", e) + return [] def getDocuments(document_ids: list[int], user_id: int): diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index 29de6e3f..7b25d0b8 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -11,7 +11,7 @@ def createSchema(schema): try: - create_schema_query = sql.SQL(f"CREATE SCHEMA {schema};") + create_schema_query = sql.SQL(f"CREATE SCHEMA IF NOT EXISTS {schema};") execute_transaction(create_schema_query, commit=True, fetch=False) print(f"Schema {schema} created successfully.") except Exception as e: @@ -40,7 +40,7 @@ def dropTables(schema): def createUserTable(schema): try: - create_table_query = sql.SQL(f"""CREATE TABLE {schema}.users + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.users ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), username text COLLATE pg_catalog."default" NOT NULL, @@ -58,31 +58,31 @@ def createUserTable(schema): def createDocumentsTable(schema): try: - create_table_query = sql.SQL(f"""CREATE TABLE {schema}.documents -( - id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), - name text NOT NULL, - content text , - content_byte bytea, - organisationid bigint NOT NULL, - userid bigint NOT NULL, - CONSTRAINT dokumentid PRIMARY KEY (id), - CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid) - REFERENCES {schema}.organisations (id) MATCH SIMPLE - ON UPDATE CASCADE - ON DELETE CASCADE - NOT VALID, - CONSTRAINT documents_userid_fkey FOREIGN KEY (userid) - REFERENCES {schema}.users (id) MATCH SIMPLE - ON UPDATE CASCADE - ON DELETE CASCADE - NOT VALID - CONSTRAINT check_only_one_filled - check (((content IS NOT NULL) AND (content_byte IS NULL)) OR ((content IS NOT NULL) AND (content_byte IS NULL))) -) - -TABLESPACE pg_default;""") - execute_transaction(create_table_query, commit=True,fetch=False) + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.documents + ( + id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), + name text NOT NULL, + content text , + content_byte bytea, + organisationid bigint NOT NULL, + userid bigint NOT NULL, + CONSTRAINT dokumentid PRIMARY KEY (id), + CONSTRAINT documents_organisationid_fkey FOREIGN KEY (organisationid) + REFERENCES {schema}.organisations (id) MATCH SIMPLE + ON UPDATE CASCADE + ON DELETE CASCADE + NOT VALID, + CONSTRAINT documents_userid_fkey FOREIGN KEY (userid) + REFERENCES {schema}.users (id) MATCH SIMPLE + ON UPDATE CASCADE + ON DELETE CASCADE + NOT VALID, + CONSTRAINT check_only_one_filled + check (((content IS NOT NULL) AND (content_byte IS NULL)) OR ((content IS NOT NULL) AND (content_byte IS NULL))) + ) + + TABLESPACE pg_default;""") + execute_transaction(create_table_query, commit=True, fetch=False) except Exception as e: print("createUserTable failed because: \n", e) diff --git a/wannadb_web/routing/files.py b/wannadb_web/routing/files.py index 93b38d8f..14372912 100644 --- a/wannadb_web/routing/files.py +++ b/wannadb_web/routing/files.py @@ -1,14 +1,15 @@ from flask import Blueprint, request, make_response -from wannadb_web.postgres.queries import getDocument +from wannadb_web.postgres.queries import getDocument, getDocumentsForOrganization from wannadb_web.util import tokenDecode from wannadb_web.postgres.transactions import addDocument main_routes = Blueprint('main_routes', __name__, url_prefix='/data') -@main_routes.route('/file', methods=['POST']) +@main_routes.route('/upload/file', methods=['POST']) def upload_files(): + files = request.files.getlist('file') form = request.form @@ -23,6 +24,7 @@ def upload_files(): content_type = file.content_type if 'text/plain' in content_type: filename = file.filename + print("name:" + filename) content = str(file.stream.read().decode('utf-8')) dokument_id = addDocument(filename, content, organisation_id, token.id) document_ids.append(dokument_id) @@ -36,10 +38,21 @@ def upload_files(): return make_response(document_ids, 201) -@main_routes.route('/file/<_id>', methods=['GET']) +@main_routes.route('/organization/get/files/<_id>', methods=['GET']) +def get_files_for_organization(_id): + authorization = request.headers.get("authorization") + org_id = int(_id) + + token = tokenDecode(authorization) + + documents = getDocumentsForOrganization(org_id) + + return make_response(documents, 200) + +@main_routes.route('/get/file/<_id>', methods=['GET']) def get_file(_id): - print(request.json) - authorization = request.json.get("authorization") + + authorization = request.headers.get("authorization") document_id = int(_id) token = tokenDecode(authorization) diff --git a/wannadb_web/routing/user.py b/wannadb_web/routing/user.py index bbfdc13f..3347c0a6 100644 --- a/wannadb_web/routing/user.py +++ b/wannadb_web/routing/user.py @@ -2,7 +2,7 @@ from flask import Blueprint, request, make_response from wannadb_web.util import Token, tokenEncode, tokenDecode -from wannadb_web.postgres.queries import checkPassword, getOrganisationIDsFromUserId +from wannadb_web.postgres.queries import checkPassword, getMembersOfOrganisation, getOrganisationFromUserId, getOrganisationIDsFromUserId, getOrganisationName from wannadb_web.postgres.transactions import (addUser, addOrganisation, addUserToOrganisation2, deleteUser, leaveOrganisation) @@ -115,6 +115,7 @@ def get_organisations(): @user_management.route('/getOrganisationName/<_id>', methods=['GET']) def get_organisation_name(_id): + print("***HERE***") authorization = request.headers.get("authorization") token = tokenDecode(authorization) if token is None: From 1d0236cd331d91d8e588d366cb31bf40bc34c25d Mon Sep 17 00:00:00 2001 From: cophilot Date: Fri, 12 Jan 2024 21:31:09 +0100 Subject: [PATCH 122/254] added prod docker-compose file --- docker-compose-prod.yaml | 84 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 docker-compose-prod.yaml diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml new file mode 100644 index 00000000..99e93d5c --- /dev/null +++ b/docker-compose-prod.yaml @@ -0,0 +1,84 @@ +version: '3.6' +services: + wannadb: + build: + context: . + dockerfile: Dockerfile + target: prod + restart: always + tty: true + ports: + - '8000:8000' + env_file: + - wannadb_web/.env/.dev + depends_on: + - postgres + - redis + volumes: + - ./:/home/wannadb + networks: + - mynetwork + + worker: + build: + context: . + dockerfile: Dockerfile + target: worker + tty: true + command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + depends_on: + - wannadb + - redis + + flower: + build: + context: . + dockerfile: Dockerfile + target: worker + tty: true + command: ['celery', '-A', 'app.celery', 'flower'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + ports: + - '5555:5555' + depends_on: + - wannadb + - redis + + postgres: + image: postgres + container_name: postgres-container + environment: + POSTGRES_PASSWORD: 0 + POSTGRES_DB: userManagement + networks: + - mynetwork + ports: + - '5432:5432' + volumes: + - pgdata:/var/lib/postgresql/data + + redis: + image: redis:alpine + container_name: redis-container + ports: + - '6379:6379' + networks: + - mynetwork + +networks: + mynetwork: + driver: bridge + +volumes: + pgdata: From a08bedaad9f51ff8adc15dd8aa8f85c574da3c47 Mon Sep 17 00:00:00 2001 From: cophilot Date: Fri, 12 Jan 2024 21:53:59 +0100 Subject: [PATCH 123/254] changes dir of entrypoint --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a4241698..6f5badb2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,8 +51,8 @@ FROM build as prod #copy the rest COPY . . -RUN chmod +x entrypoint.sh +RUN chmod +x wannadb_web/entrypoint.sh # Define the entrypoint.sh -CMD ["sh","./entrypoint.sh"] +CMD ["sh","./wannadb_web/entrypoint.sh"] From 508bdade205f79b95c6098d85b4ce99478213c5c Mon Sep 17 00:00:00 2001 From: cophilot Date: Fri, 12 Jan 2024 22:03:48 +0100 Subject: [PATCH 124/254] added prod scripts --- prod/build.sh | 3 +++ prod/up.sh | 3 +++ 2 files changed, 6 insertions(+) create mode 100755 prod/build.sh create mode 100755 prod/up.sh diff --git a/prod/build.sh b/prod/build.sh new file mode 100755 index 00000000..d4f29d8d --- /dev/null +++ b/prod/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker compose -f docker-compose-prod.yaml build \ No newline at end of file diff --git a/prod/up.sh b/prod/up.sh new file mode 100755 index 00000000..55152aa8 --- /dev/null +++ b/prod/up.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker compose -f docker-compose-prod.yaml up -d \ No newline at end of file From 56c35d1cfc170b9ba4c06598aa09799e70486205 Mon Sep 17 00:00:00 2001 From: cophilot Date: Sat, 13 Jan 2024 08:38:51 +0100 Subject: [PATCH 125/254] added updateFile and getUserNameSuggestion --- wannadb_web/postgres/queries.py | 25 ++++++++++++++++++++++++- wannadb_web/routing/files.py | 28 +++++++++++++++++++++++++++- wannadb_web/routing/user.py | 17 +++++++++++++++-- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 65c703f4..54275b13 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -3,7 +3,7 @@ import bcrypt from psycopg2 import sql -from wannadb_web.postgres.util import execute_query +from wannadb_web.postgres.util import execute_query, execute_transaction def getUserID(user: str): @@ -31,6 +31,10 @@ def getMemberIDsFromOrganisationID(organisationID: int): return execute_query(select_query, (organisationID,)) +def getUserNameSuggestion(prefix: str): + select_query = sql.SQL("SELECT username FROM users WHERE username LIKE %s;") + return execute_query(select_query, (prefix + "%",)) + def getOrganisationIDsFromUserId(userID: int): try: select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") @@ -174,6 +178,25 @@ def getDocumentsForOrganization(organisation_id: int): print("getDocumentsForOrganization failed because:\n", e) return [] +def updateDocumentContent(doc_id: int, new_content): + try: + select_query = sql.SQL("""SELECT content, content_byte + FROM documents + WHERE id = (%s) + """) + result = execute_query(select_query, (doc_id,)) + if result == None or len(result) == 0: + return False + content_type = "content" + if result[0][0] == None: + content_type = "content_byte" + update_query = sql.SQL("UPDATE documents SET "+content_type+" = (%s) WHERE id = (%s)") + execute_transaction(update_query, (new_content, doc_id,), commit=True, fetch=False) + return True + except Exception as e: + print("updateDocumentContent failed because:\n", e) + return False + def getDocuments(document_ids: list[int], user_id: int): select_query = sql.SQL(f"""SELECT name,content,content_byte diff --git a/wannadb_web/routing/files.py b/wannadb_web/routing/files.py index 14372912..75e0911b 100644 --- a/wannadb_web/routing/files.py +++ b/wannadb_web/routing/files.py @@ -1,6 +1,6 @@ from flask import Blueprint, request, make_response -from wannadb_web.postgres.queries import getDocument, getDocumentsForOrganization +from wannadb_web.postgres.queries import getDocument, getDocumentsForOrganization, updateDocumentContent from wannadb_web.util import tokenDecode from wannadb_web.postgres.transactions import addDocument @@ -17,6 +17,9 @@ def upload_files(): organisation_id = int(form.get("organisationId")) token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + document_ids: list = [] @@ -44,11 +47,31 @@ def get_files_for_organization(_id): org_id = int(_id) token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + documents = getDocumentsForOrganization(org_id) return make_response(documents, 200) +@main_routes.route('/update/file/content', methods=['POST']) +def update_file_content(): + authorization = request.headers.get("authorization") + + token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + + + data = request.get_json() + docId = data.get('documentId') + newContent = data.get('newContent') + + status = updateDocumentContent(docId, newContent) + + return make_response({"status": status}, 200) + @main_routes.route('/get/file/<_id>', methods=['GET']) def get_file(_id): @@ -56,6 +79,9 @@ def get_file(_id): document_id = int(_id) token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + document_ids: list = [] diff --git a/wannadb_web/routing/user.py b/wannadb_web/routing/user.py index 3347c0a6..7051ba4c 100644 --- a/wannadb_web/routing/user.py +++ b/wannadb_web/routing/user.py @@ -2,7 +2,7 @@ from flask import Blueprint, request, make_response from wannadb_web.util import Token, tokenEncode, tokenDecode -from wannadb_web.postgres.queries import checkPassword, getMembersOfOrganisation, getOrganisationFromUserId, getOrganisationIDsFromUserId, getOrganisationName +from wannadb_web.postgres.queries import checkPassword, getMembersOfOrganisation, getOrganisationFromUserId, getOrganisationIDsFromUserId, getOrganisationName, getUserNameSuggestion from wannadb_web.postgres.transactions import (addUser, addOrganisation, addUserToOrganisation2, deleteUser, leaveOrganisation) @@ -177,4 +177,17 @@ def get_organisation_members(_id): for member in members_raw: members.append(member[0]) - return make_response({"members": members}, 200) \ No newline at end of file + return make_response({"members": members}, 200) + +@user_management.route('/get/user/suggestion/<_prefix>', methods=['GET']) +def get_user_suggestion(_prefix): + authorization = request.headers.get("authorization") + token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + + members_raw = getUserNameSuggestion(_prefix) + result = [] + for member in members_raw: + result.append(member[0]) + return make_response({"usernames": result}, 200) \ No newline at end of file From 718b64b10e05bda22131188aa458db9498c51f95 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:03:03 +0100 Subject: [PATCH 126/254] add init ResourceManager in a worker --- app.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index 9efc196e..de8a5d05 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,7 @@ from flask_cors import CORS from flask_debugtoolbar import DebugToolbarExtension +from wannadb.resources import ResourceManager from wannadb_web.Redis.util import RedisConnection from wannadb_web.routing.core import core_routes from wannadb_web.routing.dev import dev_routes @@ -20,6 +21,7 @@ def celery_init_app(_app: Flask) -> Celery: _app.app_context() RedisConnection() + ResourceManager() class FlaskTask(Task): @@ -64,9 +66,6 @@ def not_found_error(error): return make_response({'error': f'Not Found \n {error}'}, 404) -@app.errorhandler(Exception) -def generic_error(error): - return make_response({'error': f'Internal Server Error \n {error}'}, 500) @app.route('/') From 0cdf1317e5161bbde42b51ae221128480fef73bd Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:04:06 +0100 Subject: [PATCH 127/254] add document_base and status as endpoints --- wannadb_web/routing/core.py | 55 +++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 3aa6ced2..3c2bff27 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -31,7 +31,7 @@ import pickle from celery.result import AsyncResult -from flask import Blueprint, make_response, jsonify, url_for +from flask import Blueprint, make_response, jsonify, url_for, request from wannadb.data.data import Attribute from wannadb.statistics import Statistics @@ -46,24 +46,45 @@ @core_routes.route('/document_base', methods=['POST']) def create_document(): - # form = request.form - # authorization = request.headers.get("authorization") - # _organisation_id = int(form.get("organisationId")) - # - authorization = ("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyIjoibGVvbiIsImlkIjoxfQ.YM9gwcXeFSku" - "-bz4RUKkymYvA6Af13sxH-BRlnjCCEA") - + """ + Endpoint for creating a document base. + + This endpoint is used to create a document base from a list of document ids and a list of attributes. + + Example Header: + { + "Authorization": "your_authorization_token" + } + + Example JSON Payload: + { + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "document_ids": [ + 1, 2, 3 + ], + "attributes": [ + "plane","car","bike" + ] + } + """ + form = request.form + authorization = request.headers.get("authorization") + authorization = form.get("authorization") + organisation_id = form.get("organisationId") + base_name = form.get("baseName") + document_ids = form.get("document_ids") + attributes = form.get("attributes") _token = tokenDecode(authorization) - _base_name = "base_name" - document_ids = [2, 3] - attribute = Attribute("a") + statistics = Statistics(False) - user_id = 1 + user_id = _token.id - attributesDump = pickle.dumps([attribute]) + attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump)) + task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, + base_name,organisation_id)) return make_response({'task_id': task.id}, 202) @@ -77,11 +98,15 @@ def longtask(): @core_routes.route('/status/') def task_status(task_id): - task: AsyncResult = long_task.AsyncResult(task_id) + task: AsyncResult = AsyncResult(task_id) print(task.status) meta = task.info if meta is None: return make_response({"error": "task not found"}, 404) + if task.status == "FAILURE": + return make_response( + {"state": "FAILURE", "meta": str(meta)}, 500) + print(meta) if not isinstance(meta, bytes): return make_response({"error": "task not correct"}, 404) From 63c2b5d89bd91b1e2c1b2f41417bbd45ef10c75f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:05:58 +0100 Subject: [PATCH 128/254] add getDocument_by_name --- wannadb_web/postgres/queries.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 54275b13..e3e87c1c 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -123,6 +123,30 @@ def _getDocument(documentId: int): print("_getDocument failed because: \n", e) +def getDocument_by_name(document_name: str, organisation_id: int, user_id: int): + select_query = sql.SQL("""SELECT name,content,content_byte + FROM documents d + JOIN membership m ON d.organisationid = m.organisationid + WHERE d.name = (%s) AND m.userid = (%s) AND m.organisationid = (%s) + """) + + result = execute_query(select_query, (document_name, user_id, organisation_id,)) + if len(result) == 1: + document = result[0] + name = document[0] + if document[1]: + content = document[1] + return str(name), str(content) + elif document[2]: + content = document[2] + return str(name), bytes(content) + elif len(result) > 1: + raise Exception("Multiple documents with the same name found") + else: + raise Exception("No document with that name found") + + + def getDocument(document_id: int, user_id: int): select_query = sql.SQL("""SELECT name,content,content_byte FROM documents From 0f87f713f1d7ab7191e824c696bd1778063b065f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:06:24 +0100 Subject: [PATCH 129/254] add Emitable abstraction --- wannadb_web/worker/Signal.py | 62 -------------------- wannadb_web/worker/Signals.py | 106 ++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 62 deletions(-) delete mode 100644 wannadb_web/worker/Signal.py create mode 100644 wannadb_web/worker/Signals.py diff --git a/wannadb_web/worker/Signal.py b/wannadb_web/worker/Signal.py deleted file mode 100644 index 410155d3..00000000 --- a/wannadb_web/worker/Signal.py +++ /dev/null @@ -1,62 +0,0 @@ -from dataclasses import dataclass -from typing import Any - - -class Signals: - def __init__(self): - self.feedback = Signal("feedback") - self.status = Signal("status") - self.finished = Signal("finished") - self.error = Signal("error") - self.document_base_to_ui = Signal("document_base_to_ui") - self.statistics_to_ui = Signal("statistics_to_ui") - self.feedback_request_to_ui = Signal("feedback_request_to_ui") - self.cache_db_to_ui = Signal("cache_db_to_ui") - - def print(self): - print(self.feedback) - print(self.status) - print(self.finished) - print(self.error) - print(self.document_base_to_ui) - print(self.statistics_to_ui) - print(self.feedback_request_to_ui) - print(self.cache_db_to_ui) - - def to_json(self): - try: - return {self.feedback.type: self.feedback.to_json(), - self.error.type: self.error.to_json(), - self.status.type: self.status.to_json(), - self.finished.type: self.finished.to_json(), - self.document_base_to_ui.type: self.document_base_to_ui.to_json(), - self.statistics_to_ui.type: self.statistics_to_ui.to_json(), - self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), - self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} - except Exception as e: - print(e) - return {} - - -@dataclass -class Signal: - type: str - __msg: list[Any] - - def __init__(self, signal_type: str): - self.type = signal_type - self.__msg = [] - - @property - def msg(self): - return self.__msg - - def to_json(self): - return { - 'type': self.type, - 'msg': str(self.msg) - } - - def emit(self, *args: Any): - for arg in args: - self.msg.append(arg) diff --git a/wannadb_web/worker/Signals.py b/wannadb_web/worker/Signals.py new file mode 100644 index 00000000..b85d3c1c --- /dev/null +++ b/wannadb_web/worker/Signals.py @@ -0,0 +1,106 @@ +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional, Any + + +class Signals: + def __init__(self): + self.feedback = Signal("feedback") + self.status = State("status") + self.finished = Signal("finished") + self.error = State("error") + self.document_base_to_ui = Dump("document_base_to_ui") + self.statistics = Dump("statistics_to_ui") + self.feedback_request_to_ui = Dump("feedback_request_to_ui") + self.cache_db_to_ui = Dump("cache_db_to_ui") + + def to_json(self): + try: + return {self.feedback.type: self.feedback.to_json(), + self.error.type: self.error.to_json(), + self.status.type: self.status.to_json(), + self.finished.type: self.finished.to_json(), + self.document_base_to_ui.type: self.document_base_to_ui.to_json(), + self.statistics.type: self.statistics.to_json(), + self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + except Exception as e: + print(e) + return {} + + +class Emitable(ABC): + @abstractmethod + def __init__(self, emitable_type: str): + self.type = emitable_type + self.__msg = None + + @abstractmethod + def to_json(self): + raise NotImplementedError + + @abstractmethod + def emit(self, status: Any): + raise NotImplementedError + + +@dataclass +class State(Emitable): + def __init__(self, state_type: str): + super().__init__(state_type) + self.__msg = "" + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': str(self.msg) + } + + def emit(self, status: str): + self.__msg = status + + +@dataclass +class Signal(Emitable): + __msg: Optional[float] + + def __init__(self, signal_type: str): + super().__init__(signal_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': str(self.msg) + } + + def emit(self, status: float): + self.__msg = status + + +class Dump(Emitable): + def __init__(self, dump_type: str): + super().__init__(dump_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': json.dumps(self.msg) + } + + def emit(self, status): + self.__msg = status From 42ed9ed1830c5da468ddd268a3fb7c612ed0e478 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:06:53 +0100 Subject: [PATCH 130/254] renaming --- wannadb_web/worker/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index 4993e6c3..e41f0b18 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -5,7 +5,7 @@ from wannadb.interaction import InteractionCallback from wannadb.status import StatusCallback -from wannadb_web.worker.Signal import Signals +from wannadb_web.worker.Signals import Signals class TaskUpdate: From 525e42f153bff73f83baa771bf5a4bfe12c80b42 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:07:07 +0100 Subject: [PATCH 131/254] add state --- wannadb_web/worker/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index e41f0b18..0d454ce1 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -28,6 +28,7 @@ class State(enum.Enum): PENDING = 'PENDING' SUCCESS = 'SUCCESS' FAILURE = 'FAILURE' + ERROR = 'ERROR' @dataclass From 62c4fc70953ab833c77af86b89f1cdede74c5878 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 21:07:34 +0100 Subject: [PATCH 132/254] add workaround for pipeline --- wannadb_web/worker/util.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index 0d454ce1..249078d3 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -46,7 +46,10 @@ def __init__(self, task_update_fn: Optional[TaskUpdate], state=State.STARTED): @property def status_callback(self): def status_callback_fn(message, progress) -> None: - self.signals.status.emit(message, progress) + m = str(message) + p = str(progress) + + self.signals.status.emit(m + ":" + p) self.update(State.PENDING) return StatusCallback(status_callback_fn) @@ -89,7 +92,6 @@ def update(self, state: State, msg=""): else: raise Exception("update error State is none") - def to_dump(self): state = self.state signals = self.signals From 1c820d14768ec3edd71b75486f0dc72a879cdaee Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 14 Jan 2024 23:29:49 +0100 Subject: [PATCH 133/254] adj add type support for functions --- wannadb_web/postgres/queries.py | 95 ++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index e3e87c1c..f8043ec4 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -1,5 +1,3 @@ -from typing import Union - import bcrypt from psycopg2 import sql @@ -8,7 +6,10 @@ def getUserID(user: str): select_query = sql.SQL("SELECT id FROM users WHERE username = %s;") - return execute_query(select_query, (user,)) + result = execute_query(select_query, (user,)) + if isinstance(result[0], int): + return int(result[0]) + return Exception("No user found with that name") def getOrganisationID(organisation_name: str): @@ -67,7 +68,7 @@ def getOrganisationFromUserId(user_id: int): except Exception as e: return None, e -def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[bool, str]]: +def checkPassword(user: str, password: str): select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") _password, _id = execute_query(select_query, (user,))[0] @@ -76,30 +77,29 @@ def checkPassword(user: str, password: str) -> Union[tuple[bool, int], tuple[boo stored_password = bytes(_password) check = bcrypt.checkpw(password.encode('utf-8'), stored_password) if check: - return bcrypt.checkpw(password.encode('utf-8'), stored_password), int(_id) + return int(_id) - return False, "" + return False except Exception as e: - print("checkPassword failed because: \n", e) - return False, str(e) + return e -def checkOrganisationAuthorisation(organisationName: str, userName: str) -> int: - select_query = sql.SQL("SELECT membership from membership " +def checkOrganisationAuthorisation(organisationName: str, userName: str): + select_query = sql.SQL("SELECT authorisation from membership " "where userid = (SELECT id from users where username = (%s)) " "and " "organisationid = (Select id from organisations where name = (%s))") result = execute_query(select_query, (organisationName, userName)) try: - if result[0]: - authorisation = result[0] - return int(authorisation) # sketchy conversion but works + if isinstance(result[0], int): + return int(result[0]) + if result[0] is None: + return Exception("No authorisation found") except Exception as e: - print("checkOrganisationAuthorisation failed because: \n", e) - return 99 + return Exception("checkOrganisationAuthorisation failed because: \n", e) def _getDocument(documentId: int): @@ -146,7 +146,6 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int): raise Exception("No document with that name found") - def getDocument(document_id: int, user_id: int): select_query = sql.SQL("""SELECT name,content,content_byte FROM documents @@ -231,21 +230,25 @@ def getDocuments(document_ids: list[int], user_id: int): """) result = execute_query(select_query, (user_id,)) try: - if len(result) > 0: - documents = [] - for document in result: - name = document[0] - if document[1]: - content = document[1] - documents.append((str(name), str(content))) - elif document[2]: - content = document[2] - documents.append((str(name), bytes(content))) - return documents - else: - return None + if isinstance(result, list) and isinstance(result[0], tuple): + if len(result) > 0: + if result[0][1]: + documents = [] + for document in result: + name = document[0] + content = document[1] + documents.append((str(name), str(content))) + return documents + elif result[0][2]: + b_documents = [] + for document in result: + name = document[0] + content = document[2] + b_documents.append((str(name), bytes(content))) + return b_documents + return Exception("no documents found") except Exception as e: - print("getDocuments failed because:\n", e) + return Exception("getDocuments failed because:\n", e) def getDocument_ids(organisation_id: int, user_id: int): @@ -256,19 +259,23 @@ def getDocument_ids(organisation_id: int, user_id: int): """) result = execute_query(select_query, (organisation_id, user_id,)) - print(result) - documents = [] try: - if len(result) > 0: - for document in result: - if document[1]: - name = document[0] - content = document[1] - documents.append((str(name), str(content))) - elif document[2]: - name = document[0] - content = document[2] - documents.append((str(name), bytes(content))) - return documents + if isinstance(result, list) and isinstance(result[0], tuple): + if len(result) > 0: + if result[0][1]: + documents = [] + for document in result: + name = document[0] + content = document[1] + documents.append((str(name), str(content))) + return documents + elif result[0][2]: + b_documents = [] + for document in result: + name = document[0] + content = document[2] + b_documents.append((str(name), bytes(content))) + return b_documents + return [] except Exception as e: - print("getDocument_ids failed because: \n", e) + return Exception("getDocuments failed because:\n", e) From f690d5b222c3f5acdeadcf73859c841bef416934 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 15 Jan 2024 13:41:52 +0100 Subject: [PATCH 134/254] formatting --- .pylintrc | 2 - wannadb_web/postgres/queries.py | 244 ++++++++++++++------------- wannadb_web/postgres/transactions.py | 173 +++++++++---------- wannadb_web/postgres/util.py | 67 ++++++-- wannadb_web/routing/user.py | 23 ++- 5 files changed, 286 insertions(+), 223 deletions(-) diff --git a/.pylintrc b/.pylintrc index c90edfe3..1b79b4b5 100644 --- a/.pylintrc +++ b/.pylintrc @@ -43,7 +43,6 @@ logging-modules=logging [SIMILARITIES] min-similarity-lines=8 ignore-comments=yes -ignore-docstrings=yes ignore-imports=no [FORMAT] @@ -79,7 +78,6 @@ contextmanager-decorators=contextlib.contextmanager [MISCELLANEOUS] notes=FIXME,XXX,TODO -disable=missing-function-docstring [BASIC] diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index f8043ec4..bb4a7c6d 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -1,3 +1,5 @@ +from typing import Union + import bcrypt from psycopg2 import sql @@ -9,13 +11,14 @@ def getUserID(user: str): result = execute_query(select_query, (user,)) if isinstance(result[0], int): return int(result[0]) - return Exception("No user found with that name") + return None def getOrganisationID(organisation_name: str): select_query = sql.SQL("SELECT id FROM organisations WHERE name = %s;") return execute_query(select_query, (organisation_name,)) + def getOrganisationName(organisation_id: int): select_query = sql.SQL("SELECT name FROM organisations WHERE id = %s;") response = execute_query(select_query, (organisation_id,)) @@ -23,10 +26,13 @@ def getOrganisationName(organisation_id: int): return -1 return str(response[0]) + def getMembersOfOrganisation(organisation_id: int): - select_query = sql.SQL("SELECT username FROM users WHERE id IN (SELECT userid FROM membership WHERE organisationid = %s);") + select_query = sql.SQL( + "SELECT username FROM users WHERE id IN (SELECT userid FROM membership WHERE organisationid = %s);") return execute_query(select_query, (organisation_id,)) + def getMemberIDsFromOrganisationID(organisationID: int): select_query = sql.SQL("SELECT userid FROM membership WHERE organisationid = %s;") return execute_query(select_query, (organisationID,)) @@ -36,6 +42,7 @@ def getUserNameSuggestion(prefix: str): select_query = sql.SQL("SELECT username FROM users WHERE username LIKE %s;") return execute_query(select_query, (prefix + "%",)) + def getOrganisationIDsFromUserId(userID: int): try: select_query = sql.SQL("SELECT organisationid FROM membership WHERE userid = %s;") @@ -50,6 +57,7 @@ def getOrganisationIDsFromUserId(userID: int): except Exception as e: return None, e + def getOrganisationFromUserId(user_id: int): try: select_query = sql.SQL(""" SELECT organisationid, o.name @@ -68,21 +76,29 @@ def getOrganisationFromUserId(user_id: int): except Exception as e: return None, e + def checkPassword(user: str, password: str): + """Checks if the password is correct for the given user + + Returns: + user_id: int (if password is correct) + False: bool (if password is incorrect) + Exception: Exception (if something went wrong) + Raises: + None + """ select_query = sql.SQL("SELECT password,id as pw FROM users WHERE username = %s ") - _password, _id = execute_query(select_query, (user,))[0] - try: - if _password: - stored_password = bytes(_password) - check = bcrypt.checkpw(password.encode('utf-8'), stored_password) - if check: - return int(_id) + result = execute_query(select_query, (user,)) + _password, _id = result[0] - return False + if _password: + stored_password = bytes(_password) + check = bcrypt.checkpw(password.encode('utf-8'), stored_password) + if check: + return int(_id) - except Exception as e: - return e + return False def checkOrganisationAuthorisation(organisationName: str, userName: str): @@ -92,14 +108,12 @@ def checkOrganisationAuthorisation(organisationName: str, userName: str): "organisationid = (Select id from organisations where name = (%s))") result = execute_query(select_query, (organisationName, userName)) - try: - if isinstance(result[0], int): - return int(result[0]) - if result[0] is None: - return Exception("No authorisation found") + if isinstance(result[0], int): + return int(result[0]) + if result[0] is None: + return None + - except Exception as e: - return Exception("checkOrganisationAuthorisation failed because: \n", e) def _getDocument(documentId: int): @@ -108,22 +122,31 @@ def _getDocument(documentId: int): where id = (%s)""") result = execute_query(select_query, (documentId,)) - try: - if result[0]: - if result[0][0]: - content = result[0][0] - return str(content) - else: - content = result[0][1] - return bytes(content) - else: - return None - except Exception as e: - print("_getDocument failed because: \n", e) + if result[0]: + if result[0][0]: + content = result[0][0] + return str(content) + else: + content = result[0][1] + return bytes(content) + else: + return None def getDocument_by_name(document_name: str, organisation_id: int, user_id: int): + """ + Returns: + name: str + content: str or bytes + + Raises: + Exception: if no document with that name is found + Exception: if multiple documents with that name are found + """ + + + select_query = sql.SQL("""SELECT name,content,content_byte FROM documents d JOIN membership m ON d.organisationid = m.organisationid @@ -154,53 +177,48 @@ def getDocument(document_id: int, user_id: int): """) result = execute_query(select_query, (document_id, user_id,)) - try: - if len(result) > 0: - for document in result: - name = document[0] - if document[1]: - content = document[1] - return str(name), str(content) - elif document[2]: - content = document[2] - return str(name), bytes(content) - else: - return None - except Exception as e: - print("getDocument failed because:\n", e) - -def getDocumentsForOrganization(organisation_id: int): - try: - select_query = sql.SQL("""SELECT id, name,content,content_byte - FROM documents - WHERE organisationid = (%s) - """) - result = execute_query(select_query, (organisation_id,)) - - if result == None or len(result) == 0: - return [] - - doc_array = [] - + if len(result) > 0: for document in result: - id = document[0] - name = document[1] - content = ''; - if document[2]: + name = document[0] + if document[1]: + content = document[1] + return str(name), str(content) + elif document[2]: content = document[2] - elif document[3]: - content = document[3] - doc_array.append({ - "id": id, - "name": name, - "content": content - }) - return doc_array + return str(name), bytes(content) + else: + return None - except Exception as e: - print("getDocumentsForOrganization failed because:\n", e) + +def getDocumentsForOrganization(organisation_id: int): + + select_query = sql.SQL("""SELECT id, name,content,content_byte + FROM documents + WHERE organisationid = (%s) + """) + result = execute_query(select_query, (organisation_id,)) + + if result is None or len(result) == 0: return [] + doc_array = [] + + for document in result: + id = document[0] + name = document[1] + content = '' + if document[2]: + content = document[2] + elif document[3]: + content = document[3] + doc_array.append({ + "id": id, + "name": name, + "content": content + }) + return doc_array + + def updateDocumentContent(doc_id: int, new_content): try: select_query = sql.SQL("""SELECT content, content_byte @@ -213,9 +231,9 @@ def updateDocumentContent(doc_id: int, new_content): content_type = "content" if result[0][0] == None: content_type = "content_byte" - update_query = sql.SQL("UPDATE documents SET "+content_type+" = (%s) WHERE id = (%s)") + update_query = sql.SQL("UPDATE documents SET " + content_type + " = (%s) WHERE id = (%s)") execute_transaction(update_query, (new_content, doc_id,), commit=True, fetch=False) - return True + return True except Exception as e: print("updateDocumentContent failed because:\n", e) return False @@ -229,26 +247,24 @@ def getDocuments(document_ids: list[int], user_id: int): ({",".join(str(_id) for _id in document_ids)}) """) result = execute_query(select_query, (user_id,)) - try: - if isinstance(result, list) and isinstance(result[0], tuple): - if len(result) > 0: - if result[0][1]: - documents = [] - for document in result: - name = document[0] - content = document[1] - documents.append((str(name), str(content))) - return documents - elif result[0][2]: - b_documents = [] - for document in result: - name = document[0] - content = document[2] - b_documents.append((str(name), bytes(content))) - return b_documents - return Exception("no documents found") - except Exception as e: - return Exception("getDocuments failed because:\n", e) + if isinstance(result, list) and isinstance(result[0], tuple): + if len(result) > 0: + if result[0][1]: + documents = [] + for document in result: + name = document[0] + content = document[1] + documents.append((str(name), str(content))) + return documents + elif result[0][2]: + b_documents = [] + for document in result: + name = document[0] + content = document[2] + b_documents.append((str(name), bytes(content))) + return b_documents + return [] + def getDocument_ids(organisation_id: int, user_id: int): @@ -259,23 +275,21 @@ def getDocument_ids(organisation_id: int, user_id: int): """) result = execute_query(select_query, (organisation_id, user_id,)) - try: - if isinstance(result, list) and isinstance(result[0], tuple): - if len(result) > 0: - if result[0][1]: - documents = [] - for document in result: - name = document[0] - content = document[1] - documents.append((str(name), str(content))) - return documents - elif result[0][2]: - b_documents = [] - for document in result: - name = document[0] - content = document[2] - b_documents.append((str(name), bytes(content))) - return b_documents - return [] - except Exception as e: - return Exception("getDocuments failed because:\n", e) + if isinstance(result, list) and isinstance(result[0], tuple): + if len(result) > 0: + if result[0][1]: + documents = [] + for document in result: + name = document[0] + content = document[1] + documents.append((str(name), str(content))) + return documents + elif result[0][2]: + b_documents = [] + for document in result: + name = document[0] + content = document[2] + b_documents.append((str(name), bytes(content))) + return b_documents + return [] + diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index 7b25d0b8..1b66a1d5 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -1,3 +1,4 @@ +import logging from typing import Union import bcrypt @@ -6,59 +7,57 @@ from wannadb_web.postgres.queries import checkPassword from wannadb_web.postgres.util import execute_transaction +logger: logging.Logger = logging.getLogger(__name__) + # WARNING: This is only for development purposes! def createSchema(schema): - try: - create_schema_query = sql.SQL(f"CREATE SCHEMA IF NOT EXISTS {schema};") - execute_transaction(create_schema_query, commit=True, fetch=False) - print(f"Schema {schema} created successfully.") - except Exception as e: - print(f"Error creating schema {schema}: {e}") + """ + Returns: None + """ + create_schema_query = sql.SQL(f"CREATE SCHEMA IF NOT EXISTS {schema};") + execute_transaction(create_schema_query, commit=True, fetch=False) + logger.info(f"Schema {schema} created successfully.") def dropSchema(schema): - try: - drop_schema_query = sql.SQL(f"DROP SCHEMA IF EXISTS {schema} CASCADE;") - execute_transaction(drop_schema_query, commit=True, fetch=False) - print(f"Schema {schema} dropped successfully.") - except Exception as e: - print(f"Error dropping schema {schema}: {e}") + """ + Returns: None + """ + drop_schema_query = sql.SQL(f"DROP SCHEMA IF EXISTS {schema} CASCADE;") + execute_transaction(drop_schema_query, commit=True, fetch=False) + logger.info(f"Schema {schema} dropped successfully.") def dropTables(schema): - try: - drop_table_query = sql.SQL(f"DROP TABLE IF EXISTS {schema}.users CASCADE;\n" - f"DROP TABLE IF EXISTS {schema}.documents CASCADE;\n" - f"DROP TABLE IF EXISTS {schema}.membership CASCADE;\n" - f"DROP TABLE IF EXISTS {schema}.organisations CASCADE;") - execute_transaction(drop_table_query, commit=True) - except Exception as e: - print("dropTables failed because: \n", e) + """ + Returns: None + """ + drop_table_query = sql.SQL(f"DROP TABLE IF EXISTS {schema}.users CASCADE;\n" + f"DROP TABLE IF EXISTS {schema}.documents CASCADE;\n" + f"DROP TABLE IF EXISTS {schema}.membership CASCADE;\n" + f"DROP TABLE IF EXISTS {schema}.organisations CASCADE;") + execute_transaction(drop_table_query, commit=True) def createUserTable(schema): - try: - create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.users -( + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.users + ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), username text COLLATE pg_catalog."default" NOT NULL, password bytea NOT NULL, CONSTRAINT userid PRIMARY KEY (id), CONSTRAINT unique_username UNIQUE (username) -) + ) -TABLESPACE pg_default; -""") - execute_transaction(create_table_query, commit=True, fetch=False) - except Exception as e: - print("createUserTable failed because: \n", e) + TABLESPACE pg_default; + """) + execute_transaction(create_table_query, commit=True, fetch=False) def createDocumentsTable(schema): - try: - create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.documents + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.documents ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), name text NOT NULL, @@ -82,14 +81,11 @@ def createDocumentsTable(schema): ) TABLESPACE pg_default;""") - execute_transaction(create_table_query, commit=True, fetch=False) - except Exception as e: - print("createUserTable failed because: \n", e) + execute_transaction(create_table_query, commit=True, fetch=False) def createMembershipTable(schema): - try: - create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.membership + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.membership ( userid bigint NOT NULL, organisationid bigint NOT NULL, @@ -119,14 +115,11 @@ def createMembershipTable(schema): ON {schema}.membership USING btree (organisationid ASC NULLS LAST) TABLESPACE pg_default;""") - execute_transaction(create_table_query, commit=True, fetch=False) - except Exception as e: - print("createUserTable failed because: \n", e) + execute_transaction(create_table_query, commit=True, fetch=False) def createOrganisationTable(schema): - try: - create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.organisations + create_table_query = sql.SQL(f"""CREATE TABLE IF NOT EXISTS {schema}.organisations ( id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ), name text COLLATE pg_catalog."default" NOT NULL, @@ -137,66 +130,71 @@ def createOrganisationTable(schema): TABLESPACE pg_default; """) - execute_transaction(create_table_query, commit=True, fetch=False) - except Exception as e: - print("createUserTable failed because: \n", e) + execute_transaction(create_table_query, commit=True, fetch=False) def addUser(user: str, password: str): - try: - pwBytes = password.encode('utf-8') - salt = bcrypt.gensalt() - pwHash = bcrypt.hashpw(pwBytes, salt) - # Needed this for the correct password check don't know why... - pwHash = pwHash.decode('utf-8') - - insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") - data_to_insert = (user, pwHash) - response = execute_transaction(insert_data_query, data_to_insert, commit=True) - return int(response[0][0]) - except IntegrityError: - return -1 + """ - except Exception as e: - print("addUser failed because: \n", e) + Returns: int (user id) + + Raises: Exception + + """ + + + pwBytes = password.encode('utf-8') + salt = bcrypt.gensalt() + pwHash = bcrypt.hashpw(pwBytes, salt) + # Needed this for the correct password check don't know why... + pwHash = pwHash.decode('utf-8') + + insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") + data_to_insert = (user, pwHash) + response = execute_transaction(insert_data_query, data_to_insert, commit=True) + if response is IntegrityError: + raise IntegrityError("User already exists") + if isinstance(response[0][0], int): + return int(response[0][0]) + raise Exception("addUser failed because: \n", response) def changePassword(user: str, old_password: str, new_password: str): try: if old_password == new_password: - raise Exception("same password") + return False pwcheck = checkPassword(user, old_password) - if not pwcheck: - raise Exception("wrong password") + if isinstance(pwcheck, Exception): + raise pwcheck + if isinstance(pwcheck, bool): + return bool(pwcheck) + if isinstance(pwcheck, int): + _ = int(pwcheck) - pwBytes = new_password.encode('utf-8') - salt = bcrypt.gensalt() - pwHash = bcrypt.hashpw(pwBytes, salt) + pwBytes = new_password.encode('utf-8') + salt = bcrypt.gensalt() + pwHash = bcrypt.hashpw(pwBytes, salt) - update_query = sql.SQL("UPDATE users SET password = %s WHERE username = %s;") - execute_transaction(update_query, (pwHash, user), commit=True) + update_query = sql.SQL("UPDATE users SET password = %s WHERE username = %s;") + execute_transaction(update_query, (pwHash, user), commit=True) except Exception as e: print("changePassword failed because: \n", e) def deleteUser(user: str, password: str): - try: - pwcheck = checkPassword(user, password) - if not pwcheck: - raise Exception("wrong password") - - delete_query = sql.SQL(""" - DELETE FROM users WHERE username = %s -""") - response = execute_transaction(delete_query, (user,), commit=True, fetch=False) - + pwcheck = checkPassword(user, password) + if isinstance(pwcheck, Exception): + raise pwcheck + if isinstance(pwcheck, bool): + return bool(pwcheck) + if isinstance(pwcheck, int): + user_id = int(pwcheck) + delete_query = sql.SQL("""DELETE FROM users WHERE id = %s""") + response = execute_transaction(delete_query, (user_id,), commit=True, fetch=False) if isinstance(response, bool): return response - raise TypeError("response :", response) - except Exception as e: - print("deleteUser failed because: \n", e) def addOrganisation(organisationName: str, sessionToken: str): @@ -214,14 +212,16 @@ def addOrganisation(organisationName: str, sessionToken: str): except Exception as e: print("addOrganisation failed because: \n", e) - + + def leaveOrganisation(organisationId: int, sessionToken: str): try: token: Token = tokenDecode(sessionToken) userid = token.id - - delete_query = sql.SQL("DELETE FROM membership WHERE userid = (%s) AND organisationid = (%s) returning organisationid") - execute_transaction(delete_query, (userid,organisationId, ), commit=True) + + delete_query = sql.SQL( + "DELETE FROM membership WHERE userid = (%s) AND organisationid = (%s) returning organisationid") + execute_transaction(delete_query, (userid, organisationId,), commit=True) count_query = sql.SQL("SELECT COUNT(*) FROM membership WHERE organisationid = (%s)") count = execute_transaction(count_query, [organisationId], commit=True) @@ -235,7 +235,6 @@ def leaveOrganisation(organisationId: int, sessionToken: str): except Exception as e: print("leaveOrganisation failed because: \n", e) return False, e - def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str): @@ -274,6 +273,7 @@ def addUserToOrganisation(organisationName: str, sessionToken: str, newUser: str except Exception as e: print("addUserToOrganisation failed because: \n", e) + def addUserToOrganisation2(organisationId: int, newUser: str): try: select_id_query = sql.SQL("SELECT id FROM users WHERE username = (%s)") @@ -281,7 +281,8 @@ def addUserToOrganisation2(organisationId: int, newUser: str): if userid is None: return None, "User does not exist" - insert_query = sql.SQL("INSERT INTO membership (userid, organisationid) VALUES (%s, %s) returning organisationid") + insert_query = sql.SQL( + "INSERT INTO membership (userid, organisationid) VALUES (%s, %s) returning organisationid") organisation_id = execute_transaction(insert_query, (userid[0][0], organisationId), commit=True) if organisation_id is None: return None, "you have no privileges in this organisation" diff --git a/wannadb_web/postgres/util.py b/wannadb_web/postgres/util.py index 59a907cb..1538fb6b 100644 --- a/wannadb_web/postgres/util.py +++ b/wannadb_web/postgres/util.py @@ -1,18 +1,36 @@ +import logging import os +from sqlite3 import OperationalError import psycopg2 -from psycopg2 import extensions, IntegrityError, sql +from psycopg2 import extensions, IntegrityError from psycopg2.sql import SQL DB_NAME = os.environ.get("DATABASE_NAME") DB_USER = os.environ.get("DATABASE_USER") DB_PASSWORD = os.environ.get("DATABASE_PASSWORD") DB_HOST = os.environ.get("DATABASE_HOST") -#DB_HOST = "127.0.0.1" DB_PORT = os.environ.get("DATABASE_PORT") +# DB_NAME = "userManagement" +# DB_USER = "postgres" +# DB_PASSWORD = "0" +# DB_HOST = "127.0.0.1" +# DB_PORT = "5432" + + def connectPG(): + """ + Connect to the PostgreSQL database server + + Returns: + conn (psycopg2 connection object) + + Raise: + OperationalError (if connection fails) + """ + try: conn = psycopg2.connect( dbname=DB_NAME, @@ -21,11 +39,21 @@ def connectPG(): host=DB_HOST, port=DB_PORT) return conn - except Exception as e: - raise Exception("Connection failed because: \n", e) + except OperationalError as e: + raise OperationalError("Connection failed because: \n", e) def execute_transaction(query, params=None, commit=False, fetch=True): + """Execute a query and return the result + + Returns: + list of tuples (if successful), + None (if no result), + False (if error) + Raises: + i IntegrityError + """ + conn = None cur = None try: @@ -46,8 +74,11 @@ def execute_transaction(query, params=None, commit=False, fetch=True): raise IntegrityError(f"Query execution failed for transaction: {query} \nParams: {params} \nError: {e}") except Exception as e: - raise Exception(f"Query execution failed for transaction: {query} \nParams: {params} \nError: {e}") - + logging.error(f"Query execution failed for query:\n" + f"{query} \n" + f"Params: {params} \n" + f"Error: {e}") + return False finally: if conn: conn.close() @@ -56,6 +87,16 @@ def execute_transaction(query, params=None, commit=False, fetch=True): def execute_query(query: SQL, params=None): + """Execute a query and return the result + + Returns: + list of tuples (if successful), + None (if no result), + False (if error) + Raises: + None + """ + conn = None cur = None try: @@ -65,14 +106,18 @@ def execute_query(query: SQL, params=None): cur.execute(query, params) result = cur.fetchall() + if not result: + return None - return result if result else None + return result except Exception as e: - raise Exception(f"Query execution failed for query:\n" - f"{query} \n" - f"Params: {params} \n" - f"Error: {e}") + logging.error(f"Query execution failed for query:\n" + f"{query} \n" + f"Params: {params} \n" + f"Error: {e}") + return False + finally: if conn: conn.close() diff --git a/wannadb_web/routing/user.py b/wannadb_web/routing/user.py index 7051ba4c..03912c7b 100644 --- a/wannadb_web/routing/user.py +++ b/wannadb_web/routing/user.py @@ -2,7 +2,8 @@ from flask import Blueprint, request, make_response from wannadb_web.util import Token, tokenEncode, tokenDecode -from wannadb_web.postgres.queries import checkPassword, getMembersOfOrganisation, getOrganisationFromUserId, getOrganisationIDsFromUserId, getOrganisationName, getUserNameSuggestion +from wannadb_web.postgres.queries import checkPassword, getMembersOfOrganisation, getOrganisationFromUserId, \ + getOrganisationIDsFromUserId, getOrganisationName, getUserNameSuggestion from wannadb_web.postgres.transactions import (addUser, addOrganisation, addUserToOrganisation2, deleteUser, leaveOrganisation) @@ -35,17 +36,18 @@ def login(): username = data.get('username') password = data.get('password') - _correct, _id = checkPassword(username, password) - - if _correct: + pwcheck = checkPassword(username, password) + if isinstance(pwcheck, Exception): + raise pwcheck + if isinstance(pwcheck, bool): + return make_response({'message': 'Wrong Password'}, 401) + if isinstance(pwcheck, int): + _id = pwcheck user = Token(username, _id) token = tokenEncode(user.json()) return make_response({'message': 'Log in successfully', 'token': token}, 200) - if not _correct: - return make_response({'message': 'Wrong Password'}, 401) - return make_response({'message': 'User login failed'}, 422) @user_management.route('/deleteUser/', methods=['POST']) @@ -85,6 +87,7 @@ def create_organisation(): return make_response({'organisation_id': organisation_id}, 200) return make_response({"error": error}, 409) + @user_management.route('/leaveOrganisation', methods=['POST']) def leave_organisation(): data = request.get_json() @@ -162,6 +165,7 @@ def add_user_to_organisation(): return make_response({"error": error}, 409) return make_response({'organisation_id': organisation_id}, 200) + @user_management.route('/getOrganisationMembers/<_id>', methods=['GET']) def get_organisation_members(_id): authorization = request.headers.get("authorization") @@ -171,7 +175,7 @@ def get_organisation_members(_id): members_raw = getMembersOfOrganisation(_id) if members_raw is None: - return make_response({'error':'organisation '+_id+' not found'}, 404) + return make_response({'error': 'organisation ' + _id + ' not found'}, 404) members = [] for member in members_raw: @@ -179,6 +183,7 @@ def get_organisation_members(_id): return make_response({"members": members}, 200) + @user_management.route('/get/user/suggestion/<_prefix>', methods=['GET']) def get_user_suggestion(_prefix): authorization = request.headers.get("authorization") @@ -190,4 +195,4 @@ def get_user_suggestion(_prefix): result = [] for member in members_raw: result.append(member[0]) - return make_response({"usernames": result}, 200) \ No newline at end of file + return make_response({"usernames": result}, 200) From f0189c58ccc13e065d12c574a8d039dcb94a5bc3 Mon Sep 17 00:00:00 2001 From: cophilot Date: Mon, 15 Jan 2024 14:19:03 +0100 Subject: [PATCH 135/254] bug fixes --- wannadb_web/postgres/queries.py | 2 +- wannadb_web/postgres/transactions.py | 6 ++++++ wannadb_web/routing/user.py | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index bb4a7c6d..d191735b 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -71,7 +71,7 @@ def getOrganisationFromUserId(user_id: int): organisations.append({"id": int(org[0]), "name": str(org[1])}) return organisations, None if response is None: - return [-1], None + return [], None return None, "Unexpected response format" except Exception as e: return None, e diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index 1b66a1d5..3482b93f 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -219,6 +219,12 @@ def leaveOrganisation(organisationId: int, sessionToken: str): token: Token = tokenDecode(sessionToken) userid = token.id + count_query = sql.SQL("SELECT COUNT(*) FROM membership WHERE userid = (%s) AND organisationid = (%s)") + count = execute_transaction(count_query,(userid, organisationId,), commit=True) + count = int(count[0][0]) + if count != 1: + return False, "You are not in this organisation" + delete_query = sql.SQL( "DELETE FROM membership WHERE userid = (%s) AND organisationid = (%s) returning organisationid") execute_transaction(delete_query, (userid, organisationId,), commit=True) diff --git a/wannadb_web/routing/user.py b/wannadb_web/routing/user.py index 03912c7b..a170b51e 100644 --- a/wannadb_web/routing/user.py +++ b/wannadb_web/routing/user.py @@ -60,12 +60,22 @@ def delete_user(): if authorization is None: return make_response({'message': 'no authorization '}, 401) - check, _id = checkPassword(username, password) + token = tokenDecode(authorization) if token is None: return make_response({'message': 'no authorization '}, 400) - if check is False or token.id != _id: + + pwcheck = checkPassword(username, password) + _id = None + if isinstance(pwcheck, Exception): + raise pwcheck + if isinstance(pwcheck, bool): + return make_response({'message': 'Wrong Password'}, 401) + if isinstance(pwcheck, int): + _id = pwcheck + + if token.id != _id: return make_response({'message': 'User not authorised '}, 401) response = deleteUser(username, password) @@ -118,7 +128,6 @@ def get_organisations(): @user_management.route('/getOrganisationName/<_id>', methods=['GET']) def get_organisation_name(_id): - print("***HERE***") authorization = request.headers.get("authorization") token = tokenDecode(authorization) if token is None: @@ -143,7 +152,7 @@ def get_organisation_names(): organisations, error = getOrganisationFromUserId(token.id) if error is None: return make_response({'organisations': organisations}, 200) - if organisations < 0: + if organisations <= 0: return make_response({'user is in no organisation'}, 404) return make_response({"error": error}, 409) From 74a5653549da807ceac306e21bb137ef3facad89 Mon Sep 17 00:00:00 2001 From: cophilot Date: Mon, 15 Jan 2024 15:44:44 +0100 Subject: [PATCH 136/254] added delte document endpoint --- .gitignore | 1 + wannadb_web/postgres/queries.py | 12 ++++++++++++ wannadb_web/routing/files.py | 18 +++++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f7ac2a47..5d5db640 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ postgres/__pycache__ flask_app/__pycache__ +1_:memory: \ No newline at end of file diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index d191735b..673c17d8 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -238,6 +238,18 @@ def updateDocumentContent(doc_id: int, new_content): print("updateDocumentContent failed because:\n", e) return False +def deleteDocumentContent(doc_id: int): + try: + delete_query = sql.SQL("""DELETE + FROM documents + WHERE id = (%s) + """) + execute_transaction(delete_query, (doc_id,), commit=True, fetch=False) + return True + except Exception as e: + print("updateDocumentContent failed because:\n", e) + return False + def getDocuments(document_ids: list[int], user_id: int): select_query = sql.SQL(f"""SELECT name,content,content_byte diff --git a/wannadb_web/routing/files.py b/wannadb_web/routing/files.py index 75e0911b..69701d3c 100644 --- a/wannadb_web/routing/files.py +++ b/wannadb_web/routing/files.py @@ -1,6 +1,6 @@ from flask import Blueprint, request, make_response -from wannadb_web.postgres.queries import getDocument, getDocumentsForOrganization, updateDocumentContent +from wannadb_web.postgres.queries import deleteDocumentContent, getDocument, getDocumentsForOrganization, updateDocumentContent from wannadb_web.util import tokenDecode from wannadb_web.postgres.transactions import addDocument @@ -72,6 +72,22 @@ def update_file_content(): return make_response({"status": status}, 200) +@main_routes.route('/file/delete', methods=['POST']) +def delete_file(): + authorization = request.headers.get("authorization") + + token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + + + data = request.get_json() + docId = data.get('documentId') + + status = deleteDocumentContent(docId) + + return make_response({"status": status}, 200) + @main_routes.route('/get/file/<_id>', methods=['GET']) def get_file(_id): From a0fbee78037333787a6af75d962b103e9b27684a Mon Sep 17 00:00:00 2001 From: cophilot Date: Mon, 15 Jan 2024 15:57:28 +0100 Subject: [PATCH 137/254] addded todos --- docker-compose-prod.yaml | 6 ------ wannadb_web/routing/core.py | 14 ++++++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index 99e93d5c..b25b7418 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -14,8 +14,6 @@ services: depends_on: - postgres - redis - volumes: - - ./:/home/wannadb networks: - mynetwork @@ -28,8 +26,6 @@ services: command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] env_file: - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb networks: - mynetwork depends_on: @@ -45,8 +41,6 @@ services: command: ['celery', '-A', 'app.celery', 'flower'] env_file: - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb networks: - mynetwork ports: diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 3c2bff27..97b06b12 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -70,7 +70,7 @@ def create_document(): """ form = request.form authorization = request.headers.get("authorization") - authorization = form.get("authorization") + #authorization = form.get("authorization") organisation_id = form.get("organisationId") base_name = form.get("baseName") document_ids = form.get("document_ids") @@ -83,8 +83,10 @@ def create_document(): attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, - base_name,organisation_id)) + # TODO BUG EXPected 5 arguments, got 7 + + task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump,)) + #base_name,organisation_id)) return make_response({'task_id': task.id}, 202) @@ -96,10 +98,10 @@ def longtask(): task_id=task.id)} -@core_routes.route('/status/') -def task_status(task_id): +@core_routes.route('/status/') +def task_status(task_id):# -> Any: task: AsyncResult = AsyncResult(task_id) - print(task.status) + # TODO BUG meta = task.info if meta is None: return make_response({"error": "task not found"}, 404) From 74ee592e6115a9a19862422b15c5509d5f8875d0 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 10:23:19 +0100 Subject: [PATCH 138/254] formatting --- backend-requirements.txt | 1 + wannadb_web/SQLite/Cache_DB.py | 8 +++- wannadb_web/postgres/transactions.py | 7 ++- wannadb_web/routing/core.py | 17 ++++---- wannadb_web/worker/Web_API.py | 42 ++++++++++-------- wannadb_web/worker/tasks.py | 64 +++++++++++++++++++++++++--- wannadb_web/worker/util.py | 2 +- 7 files changed, 104 insertions(+), 37 deletions(-) diff --git a/backend-requirements.txt b/backend-requirements.txt index 87a02da3..59ecf0f4 100644 --- a/backend-requirements.txt +++ b/backend-requirements.txt @@ -16,3 +16,4 @@ celery~=5.3.6 flower~=2.0.1 redis~=5.0.1 pickle5~=0.0.11 +mypy==1.5.1 diff --git a/wannadb_web/SQLite/Cache_DB.py b/wannadb_web/SQLite/Cache_DB.py index 75f17121..7c805044 100644 --- a/wannadb_web/SQLite/Cache_DB.py +++ b/wannadb_web/SQLite/Cache_DB.py @@ -6,10 +6,16 @@ class SQLiteCacheDBWrapper: + def __init__(self, user_id: int, db_file="wannadb_cache.db"): """Initialize the RedisCache instance for a specific user.""" - self.db_identifier = f"{user_id}_{db_file}" + if db_file == ":memory:": + self.db_identifier = db_file + else: + self.db_identifier = f"{user_id}_{db_file}" self.cache_db = SQLiteCacheDB(db_file=self.db_identifier) + if self.cache_db.conn is None: + raise Exception("Cache db could not be initialized") def delete(self): self.cache_db.conn.close() diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index 3482b93f..bb863f81 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -142,15 +142,14 @@ def addUser(user: str, password: str): """ - pwBytes = password.encode('utf-8') salt = bcrypt.gensalt() pwHash = bcrypt.hashpw(pwBytes, salt) # Needed this for the correct password check don't know why... - pwHash = pwHash.decode('utf-8') + pwHashcode = pwHash.decode('utf-8') insert_data_query = sql.SQL("INSERT INTO users (username, password) VALUES (%s, %s) returning id;") - data_to_insert = (user, pwHash) + data_to_insert = (user, pwHashcode) response = execute_transaction(insert_data_query, data_to_insert, commit=True) if response is IntegrityError: raise IntegrityError("User already exists") @@ -220,7 +219,7 @@ def leaveOrganisation(organisationId: int, sessionToken: str): userid = token.id count_query = sql.SQL("SELECT COUNT(*) FROM membership WHERE userid = (%s) AND organisationid = (%s)") - count = execute_transaction(count_query,(userid, organisationId,), commit=True) + count = execute_transaction(count_query, (userid, organisationId,), commit=True) count = int(count[0][0]) if count != 1: return False, "You are not in this organisation" diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 97b06b12..f23fd8f2 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -69,24 +69,27 @@ def create_document(): } """ form = request.form - authorization = request.headers.get("authorization") - #authorization = form.get("authorization") + #authorization = request.headers.get("authorization") + authorization = form.get("authorization") organisation_id = form.get("organisationId") base_name = form.get("baseName") document_ids = form.get("document_ids") - attributes = form.get("attributes") + attributes_string = form.get("attributes") _token = tokenDecode(authorization) + attributes = [] + for attribute_string in attributes_string: + attributes.append(Attribute(attribute_string)) + statistics = Statistics(False) user_id = _token.id attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - # TODO BUG EXPected 5 arguments, got 7 - task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump,)) - #base_name,organisation_id)) + task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, + base_name,organisation_id)) return make_response({'task_id': task.id}, 202) @@ -108,10 +111,8 @@ def task_status(task_id):# -> Any: if task.status == "FAILURE": return make_response( {"state": "FAILURE", "meta": str(meta)}, 500) - print(meta) if not isinstance(meta, bytes): return make_response({"error": "task not correct"}, 404) - taskObject = TaskObject.from_dump(meta) return make_response({"state": taskObject.state.value, "meta": taskObject.signals.to_json(), "msg": taskObject.msg}, 200) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 738fa7e9..5f17f422 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -2,11 +2,13 @@ import io import json import logging +import traceback from typing import Optional +from wannadb import resources from wannadb.configuration import Pipeline from wannadb.data.data import Attribute, Document, DocumentBase -from wannadb.interaction import EmptyInteractionCallback, InteractionCallback +from wannadb.interaction import EmptyInteractionCallback from wannadb.matching.distance import SignalsMeanDistance from wannadb.matching.matching import RankingBasedMatcher from wannadb.preprocessing.embedding import BERTContextSentenceEmbedder, RelativePositionEmbedder, \ @@ -17,7 +19,6 @@ from wannadb.preprocessing.normalization import CopyNormalizer from wannadb.preprocessing.other_processing import ContextSentenceCacher from wannadb.statistics import Statistics -from wannadb.status import StatusCallback from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.SQLite import Cache_DB from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper @@ -30,7 +31,7 @@ class WannaDB_WebAPI: - def __init__(self, user_id: int, task_object:TaskObject): + def __init__(self, user_id: int, task_object: TaskObject, document_base_name: str, organisation_id: int): logger.info("WannaDB_WebAPI initialized") self.user_id = user_id self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") @@ -38,10 +39,18 @@ def __init__(self, user_id: int, task_object:TaskObject): self.status_callback = task_object.status_callback self.interaction_callback = task_object.interaction_callback self.signals = task_object.signals + self.document_base_name = document_base_name + self.organisation_id = organisation_id + if resources.MANAGER is None: + self.signals.error.emit("Resource Manager not initialized!") + raise Exception("Resource Manager not initialized!") + if self.sqLiteCacheDBWrapper.cache_db.conn is None: + self.signals.error.emit("Cache db could not be initialized!") + raise Exception("Cache db could not be initialized!") def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") - self.signals.status.emit("Creating document base...", -1) + self.signals.status.emit("create_document_base") try: self.sqLiteCacheDBWrapper.reset_cache_db() @@ -51,10 +60,9 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") error = "Document base is inconsistent!" - return error # load default preprocessing phase - self.signals.status.emit("Loading preprocessing phase...", -1) + self.signals.status.emit("Loading preprocessing phase...") # noinspection PyTypeChecker preprocessing_phase = Pipeline([ @@ -73,20 +81,21 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri preprocessing_phase(document_base, EmptyInteractionCallback(), self.status_callback, statistics) self.signals.document_base_to_ui.emit(document_base) - self.signals.statistics_to_ui.emit(statistics) - self.signals.finished.emit("Finished!") + self.signals.statistics.emit(statistics) + self.signals.finished.emit(1) + self.signals.status.emit("Finished!") except Exception as e: - self.signals.error.emit(e) + traceback_str = traceback.format_exc() + self.signals.error.emit(str(e) + "\n" + traceback_str) - def load_document_base_from_bson(self, document_id: int, user_id: int): + def load_document_base_from_bson(self): logger.debug("Called function 'load_document_base_from_bson'.") - wrapper_cache_db: Optional[SQLiteCacheDBWrapper] = None try: - wrapper_cache_db = Cache_DB.Cache_Manager.user(user_id) - cache_db = wrapper_cache_db.cache_db + self.sqLiteCacheDBWrapper.reset_cache_db() - document = getDocument(document_id, user_id) + document = getDocument(self.document_id, user_id) + get if isinstance(document, str): logger.error("document is not a DocumentBase!") return -1 @@ -96,7 +105,6 @@ def load_document_base_from_bson(self, document_id: int, user_id: int): logger.error("Document base is inconsistent!") return -1 - wrapper_cache_db.reset_cache_db() for attribute in document_base.attributes: cache_db.create_table_by_name(attribute.name) @@ -112,10 +120,10 @@ def load_document_base_from_bson(self, document_id: int, user_id: int): if wrapper_cache_db is not None: wrapper_cache_db.disconnect() - def save_document_base_to_bson(self, name: str, organisation_id: int, document_base: DocumentBase, user_id: int): + def save_document_base_to_bson(self, document_base_name: str, organisation_id: int, document_base: DocumentBase, user_id: int): logger.debug("Called function 'save_document_base_to_bson'.") try: - document_id = addDocument(name, document_base.to_bson(), organisation_id, user_id) + document_id = addDocument(document_base_name, document_base.to_bson(), organisation_id, user_id) if document_id is None: logger.error("Document base could not be saved to BSON!") elif document_id == -1: diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 3e19edaa..acd55f28 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -1,3 +1,4 @@ +import logging import pickle import random import time @@ -5,21 +6,45 @@ from celery import current_app from wannadb.data.data import Document, Attribute +from wannadb.resources import ResourceManager from wannadb.statistics import Statistics +from wannadb_web.Redis.util import RedisConnection from wannadb_web.postgres.queries import getDocuments +from wannadb_web.util import tokenDecode from wannadb_web.worker.Web_API import WannaDB_WebAPI -from wannadb_web.worker.util import TaskObject, State, TaskUpdate +from wannadb_web.worker.util import State, TaskUpdate +from wannadb_web.worker.util import TaskObject class U: - def update_state(*args, **kwargs): print('update_state called with args: ', args, ' and kwargs: ', kwargs) print("meta: ", TaskObject.from_dump(kwargs.get("meta")).signals.to_json()) +logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +# RedisConnection() +# ResourceManager() +# authorization = ( +# "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyIjoibGVvbiIsImlkIjoxfQ.YM9gwcXeFSku-bz4RUKkymYvA6Af13sxH-BRlnjCCEA") +# _token = tokenDecode(authorization) +# _base_name = "base_name" +# document_ids = [2, 3] +# attribute = Attribute("a") +# statistics = Statistics(False) +# user_id = 1 +# attributesDump = pickle.dumps([attribute]) +# statisticsDump = pickle.dumps(statistics) +# uuuuuuuu = U() + + @current_app.task(bind=True) -def create_document_base_task(self, user_id, document_ids: [int], attributes_dump: bytes, statistics_dump: bytes): +def create_document_base_task(self, user_id, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, + base_name: str, organisation_id: int): + """ + define values + """ + attributes: list[Attribute] = pickle.loads(attributes_dump) statistics: Statistics = pickle.loads(statistics_dump) @@ -34,10 +59,17 @@ def task_callback_fn(state: str, meta: TaskObject): task_object = TaskObject(task_callback) - api = WannaDB_WebAPI(1, task_object) + """ + init api + """ + + api = WannaDB_WebAPI(1, task_object, base_name, organisation_id) task_object.update(state=State.PENDING, msg="api created") try: + """ + decoding + """ if not isinstance(attributes[0], Attribute): task_object.update(State.FAILURE, "Invalid attributes") raise Exception("Invalid attributes") @@ -54,13 +86,33 @@ def task_callback_fn(state: str, meta: TaskObject): documents.append(Document(doc[0], doc[1])) else: print("No documents found") - # raise Exception("No documents found") + """ + Creating document base + """ api.create_document_base(documents, attributes, statistics) + if task_object.signals.error.msg: + task_object.update(State.FAILURE, api.signals) + + """ + saving document base + """ + + #api.save_document_base_to_bson() + + """ + response + """ + + if task_object.signals.finished.msg: + task_object.update(State.SUCCESS, task_object.signals.finished.msg) + else: + task_object.update(State.ERROR, "task_object signals not set?") return task_object.to_dump() except Exception as e: - self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) + task_object.update(State.FAILURE, "Exception: " + str(e)) + task_object.to_dump() @current_app.task(bind=True) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index 249078d3..1970deb9 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -90,7 +90,7 @@ def update(self, state: State, msg=""): self.msg = msg self.task_update_fn(self.state.value, self) else: - raise Exception("update error State is none") + raise Exception(f"update error State is {type(state)}") def to_dump(self): state = self.state From 2af772032be129c29cfaff6d4a90a2a8ec4e7bc0 Mon Sep 17 00:00:00 2001 From: cophilot Date: Tue, 16 Jan 2024 10:31:26 +0100 Subject: [PATCH 139/254] bug fixes --- wannadb_web/routing/core.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index f23fd8f2..f27e21ad 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -69,17 +69,20 @@ def create_document(): } """ form = request.form - #authorization = request.headers.get("authorization") - authorization = form.get("authorization") - organisation_id = form.get("organisationId") - base_name = form.get("baseName") - document_ids = form.get("document_ids") - attributes_string = form.get("attributes") + data = request.get_json() + + authorization = request.headers.get("authorization") + #authorization = form.get("authorization") + organisation_id = data.get("organisationId") + base_name = data.get("baseName") + document_ids = data.get("document_ids") + attributes_string = data.get("attributes") + print("attributes_string", attributes_string) _token = tokenDecode(authorization) attributes = [] - for attribute_string in attributes_string: - attributes.append(Attribute(attribute_string)) + for att in attributes_string: + attributes.append(Attribute(att)) statistics = Statistics(False) user_id = _token.id From 651268910b39ac83277ceb492a00207eb25e9d23 Mon Sep 17 00:00:00 2001 From: cophilot Date: Tue, 16 Jan 2024 13:36:27 +0100 Subject: [PATCH 140/254] added vloume for worker --- docker-compose-prod.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index b25b7418..4491c02e 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -26,6 +26,8 @@ services: command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] env_file: - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb networks: - mynetwork depends_on: From 2af0aa43fc166be94768157517708fa577578878 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:32:36 +0100 Subject: [PATCH 141/254] formatting --- wannadb_web/SQLite/Cache_DB.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/wannadb_web/SQLite/Cache_DB.py b/wannadb_web/SQLite/Cache_DB.py index 7c805044..8ae96af6 100644 --- a/wannadb_web/SQLite/Cache_DB.py +++ b/wannadb_web/SQLite/Cache_DB.py @@ -1,4 +1,5 @@ import logging +from typing import Optional from wannadb_parsql.cache_db import SQLiteCacheDB @@ -6,6 +7,7 @@ class SQLiteCacheDBWrapper: + __cache_db: Optional[SQLiteCacheDB] def __init__(self, user_id: int, db_file="wannadb_cache.db"): """Initialize the RedisCache instance for a specific user.""" @@ -13,21 +15,27 @@ def __init__(self, user_id: int, db_file="wannadb_cache.db"): self.db_identifier = db_file else: self.db_identifier = f"{user_id}_{db_file}" - self.cache_db = SQLiteCacheDB(db_file=self.db_identifier) + self.__cache_db = SQLiteCacheDB(db_file=self.db_identifier) if self.cache_db.conn is None: raise Exception("Cache db could not be initialized") + @property + def cache_db(self): + if self.__cache_db is None: + raise Exception("Cache db is not initialized") + return self.__cache_db + def delete(self): self.cache_db.conn.close() - self.cache_db = None + self.__cache_db = None self.db_identifier = None def reset_cache_db(self): logger.debug("Reset cache db") - if self.cache_db is not None: + if self.__cache_db is not None: self.cache_db.conn.close() - self.cache_db = None - self.cache_db = SQLiteCacheDB(db_file=self.db_identifier) + self.__cache_db = None + self.__cache_db = SQLiteCacheDB(db_file=self.db_identifier) def disconnect(self): if self.cache_db is None: From d7fb4c7a158847ae5234cc1c427685937c4ddbf5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:34:29 +0100 Subject: [PATCH 142/254] formatting --- wannadb_web/routing/core.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index f27e21ad..04975da5 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -36,6 +36,7 @@ from wannadb.data.data import Attribute from wannadb.statistics import Statistics from wannadb_web.util import tokenDecode +from wannadb_web.worker.data import nugget_to_json from wannadb_web.worker.tasks import create_document_base_task, long_task from wannadb_web.worker.util import TaskObject @@ -68,18 +69,22 @@ def create_document(): ] } """ - form = request.form data = request.get_json() - + authorization = request.headers.get("authorization") #authorization = form.get("authorization") organisation_id = data.get("organisationId") base_name = data.get("baseName") document_ids = data.get("document_ids") attributes_string = data.get("attributes") - print("attributes_string", attributes_string) + if (organisation_id is None or base_name is None or document_ids is None or attributes_string is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) _token = tokenDecode(authorization) + if _token is False: + return make_response({"error": "invalid token"}, 401) + attributes = [] for att in attributes_string: attributes.append(Attribute(att)) @@ -105,9 +110,8 @@ def longtask(): @core_routes.route('/status/') -def task_status(task_id):# -> Any: +def task_status(task_id): # -> Any: task: AsyncResult = AsyncResult(task_id) - # TODO BUG meta = task.info if meta is None: return make_response({"error": "task not found"}, 404) From 66206f557669abb22c8331741fd562b7b845ae7a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:35:20 +0100 Subject: [PATCH 143/254] add: json parsing add: new Emitable --- wannadb_web/worker/Signals.py | 106 --------------- wannadb_web/worker/data.py | 241 ++++++++++++++++++++++++++++++++++ 2 files changed, 241 insertions(+), 106 deletions(-) delete mode 100644 wannadb_web/worker/Signals.py create mode 100644 wannadb_web/worker/data.py diff --git a/wannadb_web/worker/Signals.py b/wannadb_web/worker/Signals.py deleted file mode 100644 index b85d3c1c..00000000 --- a/wannadb_web/worker/Signals.py +++ /dev/null @@ -1,106 +0,0 @@ -import json -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional, Any - - -class Signals: - def __init__(self): - self.feedback = Signal("feedback") - self.status = State("status") - self.finished = Signal("finished") - self.error = State("error") - self.document_base_to_ui = Dump("document_base_to_ui") - self.statistics = Dump("statistics_to_ui") - self.feedback_request_to_ui = Dump("feedback_request_to_ui") - self.cache_db_to_ui = Dump("cache_db_to_ui") - - def to_json(self): - try: - return {self.feedback.type: self.feedback.to_json(), - self.error.type: self.error.to_json(), - self.status.type: self.status.to_json(), - self.finished.type: self.finished.to_json(), - self.document_base_to_ui.type: self.document_base_to_ui.to_json(), - self.statistics.type: self.statistics.to_json(), - self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), - self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} - except Exception as e: - print(e) - return {} - - -class Emitable(ABC): - @abstractmethod - def __init__(self, emitable_type: str): - self.type = emitable_type - self.__msg = None - - @abstractmethod - def to_json(self): - raise NotImplementedError - - @abstractmethod - def emit(self, status: Any): - raise NotImplementedError - - -@dataclass -class State(Emitable): - def __init__(self, state_type: str): - super().__init__(state_type) - self.__msg = "" - - @property - def msg(self): - return self.__msg - - def to_json(self): - return { - 'type': self.type, - 'msg': str(self.msg) - } - - def emit(self, status: str): - self.__msg = status - - -@dataclass -class Signal(Emitable): - __msg: Optional[float] - - def __init__(self, signal_type: str): - super().__init__(signal_type) - self.__msg = None - - @property - def msg(self): - return self.__msg - - def to_json(self): - return { - 'type': self.type, - 'msg': str(self.msg) - } - - def emit(self, status: float): - self.__msg = status - - -class Dump(Emitable): - def __init__(self, dump_type: str): - super().__init__(dump_type) - self.__msg = None - - @property - def msg(self): - return self.__msg - - def to_json(self): - return { - 'type': self.type, - 'msg': json.dumps(self.msg) - } - - def emit(self, status): - self.__msg = status diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py new file mode 100644 index 00000000..b25a53e7 --- /dev/null +++ b/wannadb_web/worker/data.py @@ -0,0 +1,241 @@ +import json +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional, Any + +from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute +from wannadb.data.signals import BaseSignal +from wannadb.statistics import Statistics + + +def signal_to_json(signal: BaseSignal): + return { + "name": signal.identifier, + "signal": "not serializable" + } + + +def nugget_to_json(nugget: InformationNugget): + return { + "text": nugget.text, + "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in + nugget.signals.items()], + "document": {"name": nugget.document.name, "text": nugget.document.text}, + "end_char": str(nugget.end_char), + "start_char": str(nugget.start_char)} + + +def document_to_json(document: Document): + return { + "name": document.name, + "text": document.text, + "attribute_mappings": "not implemented yet", + "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in + document.signals.items()], + "nuggets": [nugget_to_json(nugget) for nugget in document.nuggets] + } + + +def attribute_to_json(attribute: Attribute): + return { + "name": attribute.name + } + + +def document_base_to_json(document_base: DocumentBase): + return { + 'msg': {"attributes ": [attribute.name for attribute in document_base.attributes], + "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] + } + } + + +class Signals: + def __init__(self): + self.feedback = _Signal("feedback") + self.status = _State("status") + self.finished = _Signal("finished") + self.error = _Error("error") + self.document_base_to_ui = _DocumentBase("document_base_to_ui") + self.statistics = _Statistics("statistics_to_ui") + self.feedback_request_to_ui = _Dump("feedback_request_to_ui") + self.cache_db_to_ui = _Dump("cache_db_to_ui") + + def to_json(self) -> dict[str, str]: + try: + return {self.feedback.type: self.feedback.to_json(), + self.error.type: self.error.to_json(), + self.status.type: self.status.to_json(), + self.finished.type: self.finished.to_json(), + self.document_base_to_ui.type: self.document_base_to_ui.to_json(), + self.statistics.type: self.statistics.to_json(), + self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + except Exception as e: + print(e) + return {"error": "signals to json error"} + + +class Emitable(ABC): + __msg: Optional[Any] + + @abstractmethod + def __init__(self, emitable_type: str): + self.type = emitable_type + self.__msg = None + + @abstractmethod + def to_json(self): + raise NotImplementedError + + @abstractmethod + def emit(self, status: Any): + raise NotImplementedError + + +@dataclass +class _State(Emitable): + __msg: Optional[str] + + def __init__(self, state_type: str): + super().__init__(state_type) + self.__msg = "" + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': str(self.msg) + } + + def emit(self, status: str): + self.__msg = status + + +@dataclass +class _Signal(Emitable): + __msg: Optional[float] + + def __init__(self, signal_type: str): + super().__init__(signal_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': self.msg + } + + def emit(self, status: float): + self.__msg = status + + +@dataclass +class _Error(Emitable): + __msg: Optional[BaseException] + + def __init__(self, error_type: str): + super().__init__(error_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': str(self.msg) + } + + def emit(self, exception: BaseException): + self.__msg = exception + + +@dataclass +class _Nugget(Emitable): + __msg: Optional[InformationNugget] + + def __init__(self, nugget_type: str): + super().__init__(nugget_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + if self.msg is None: + return {} + return nugget_to_json(self.msg) + + def emit(self, status): + self.__msg = status + + +@dataclass +class _DocumentBase(Emitable): + __msg: Optional[DocumentBase] + + def __init__(self, document_type: str): + super().__init__(document_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + if self.msg is None: + return {} + return document_base_to_json(self.msg) + + def emit(self, status): + self.__msg = status + + +class _Statistics(Emitable): + __msg: Statistics + + def __init__(self, statistics_type: str): + super().__init__(statistics_type) + self.__msg = Statistics(False) + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': self.__msg.to_serializable() + } + + def emit(self, statistic: Statistics): + self.__msg = statistic + + +class _Dump(Emitable): + def __init__(self, dump_type: str): + super().__init__(dump_type) + self.__msg = None + + @property + def msg(self): + return self.__msg + + def to_json(self): + return { + 'type': self.type, + 'msg': json.dumps(self.msg) + } + + def emit(self, status): + self.__msg = status From 767a4b334bb3f9e92986827270b5f31697c45fe7 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:35:47 +0100 Subject: [PATCH 144/254] formatting --- wannadb_web/postgres/queries.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 673c17d8..2fcd17dc 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -114,8 +114,6 @@ def checkOrganisationAuthorisation(organisationName: str, userName: str): return None - - def _getDocument(documentId: int): select_query = sql.SQL("""SELECT content,content_byte from documents @@ -134,7 +132,7 @@ def _getDocument(documentId: int): return None -def getDocument_by_name(document_name: str, organisation_id: int, user_id: int): +def getDocument_by_name(document_name: str, organisation_id: int, user_id: int) -> tuple[str, Union[str, bytes]]: """ Returns: name: str @@ -145,8 +143,6 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int): Exception: if multiple documents with that name are found """ - - select_query = sql.SQL("""SELECT name,content,content_byte FROM documents d JOIN membership m ON d.organisationid = m.organisationid @@ -165,8 +161,7 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int): return str(name), bytes(content) elif len(result) > 1: raise Exception("Multiple documents with the same name found") - else: - raise Exception("No document with that name found") + raise Exception("No document with that name found") def getDocument(document_id: int, user_id: int): @@ -191,7 +186,6 @@ def getDocument(document_id: int, user_id: int): def getDocumentsForOrganization(organisation_id: int): - select_query = sql.SQL("""SELECT id, name,content,content_byte FROM documents WHERE organisationid = (%s) @@ -238,6 +232,7 @@ def updateDocumentContent(doc_id: int, new_content): print("updateDocumentContent failed because:\n", e) return False + def deleteDocumentContent(doc_id: int): try: delete_query = sql.SQL("""DELETE @@ -278,7 +273,6 @@ def getDocuments(document_ids: list[int], user_id: int): return [] - def getDocument_ids(organisation_id: int, user_id: int): select_query = sql.SQL("""SELECT name,content,content_byte from documents @@ -304,4 +298,3 @@ def getDocument_ids(organisation_id: int, user_id: int): b_documents.append((str(name), bytes(content))) return b_documents return [] - From 34c5ba3ad7bd63948a003a0204350f7f66aa7fb5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:36:40 +0100 Subject: [PATCH 145/254] formatting --- wannadb_web/worker/tasks.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index acd55f28..6bc30bca 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -6,20 +6,17 @@ from celery import current_app from wannadb.data.data import Document, Attribute -from wannadb.resources import ResourceManager from wannadb.statistics import Statistics -from wannadb_web.Redis.util import RedisConnection from wannadb_web.postgres.queries import getDocuments -from wannadb_web.util import tokenDecode from wannadb_web.worker.Web_API import WannaDB_WebAPI from wannadb_web.worker.util import State, TaskUpdate from wannadb_web.worker.util import TaskObject -class U: - def update_state(*args, **kwargs): - print('update_state called with args: ', args, ' and kwargs: ', kwargs) - print("meta: ", TaskObject.from_dump(kwargs.get("meta")).signals.to_json()) +# class U: +# def update_state(*args, **kwargs): +# print('update_state called with args: ', args, ' and kwargs: ', kwargs) +# print("meta: ", TaskObject.from_dump(kwargs.get("meta")).signals.to_json()) logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -104,15 +101,15 @@ def task_callback_fn(state: str, meta: TaskObject): response """ - if task_object.signals.finished.msg: - task_object.update(State.SUCCESS, task_object.signals.finished.msg) - else: + if task_object.signals.finished.msg is None: task_object.update(State.ERROR, "task_object signals not set?") + else: + task_object.update(State.SUCCESS, task_object.signals.finished.msg) return task_object.to_dump() except Exception as e: - task_object.update(State.FAILURE, "Exception: " + str(e)) - task_object.to_dump() + #task_object.update(State.FAILURE, str(e)) + raise e @current_app.task(bind=True) From 6150714f0bc4d7b460513f8a195fe5a57f20b60a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:37:04 +0100 Subject: [PATCH 146/254] formatting and access bug fix --- wannadb_web/worker/util.py | 45 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index 1970deb9..247a6253 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -1,11 +1,11 @@ import enum import pickle -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Callable, Any, Optional from wannadb.interaction import InteractionCallback from wannadb.status import StatusCallback -from wannadb_web.worker.Signals import Signals +from wannadb_web.worker.data import Signals class TaskUpdate: @@ -35,13 +35,10 @@ class State(enum.Enum): class TaskObject: """Class for representing the response of a task.""" - msg: str - __signals: Signals - - def __init__(self, task_update_fn: Optional[TaskUpdate], state=State.STARTED): - self.task_update_fn = task_update_fn - self.__state = state - self.__signals = Signals() + task_update_fn: Optional[TaskUpdate] + __signals: Signals = field(default_factory=Signals) + __state: State = State.STARTED + msg: str = "" @property def status_callback(self): @@ -68,11 +65,8 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): def state(self) -> State: return self.__state - @property - def signals(self) -> Signals: - return self.__signals - - def __set_state(self, state: State): + @state.setter + def state(self, state: State): if not isinstance(state, State): print("update error Invalid state", state) raise Exception("update error Invalid state") @@ -81,27 +75,34 @@ def __set_state(self, state: State): raise Exception("update error State is none") self.__state = state - def __set_signals(self, signals: Signals): + @property + def signals(self) -> Signals: + return self.__signals + + @signals.setter + def signals(self, signals: Signals): self.__signals = signals def update(self, state: State, msg=""): + if self.task_update_fn is None: + raise Exception("update error task_update_fn is None do you want to update here?") if isinstance(state, State) and state is not None: - self.__set_state(state) + self.state = state self.msg = msg self.task_update_fn(self.state.value, self) else: raise Exception(f"update error State is {type(state)}") def to_dump(self): - state = self.state - signals = self.signals - msg = self.msg - return pickle.dumps((state, signals, msg)) + _state = self.state + _signals = self.signals + _msg = self.msg + return pickle.dumps((_state, _signals, _msg)) @staticmethod def from_dump(dump: bytes): state, signals, msg = pickle.loads(dump) - to = TaskObject(None, state=state) - to.__set_signals(signals) + to = TaskObject(None,state) + to.signals = signals to.msg = msg return to From eb1ea395e7eaf404ab422dfb34785c62669c5b62 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 14:41:01 +0100 Subject: [PATCH 147/254] fix emit error and formatting --- wannadb_web/worker/Web_API.py | 240 +++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 104 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 5f17f422..55bbd325 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -19,10 +19,8 @@ from wannadb.preprocessing.normalization import CopyNormalizer from wannadb.preprocessing.other_processing import ContextSentenceCacher from wannadb.statistics import Statistics -from wannadb_web.Redis.RedisCache import RedisCache -from wannadb_web.SQLite import Cache_DB from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper -from wannadb_web.postgres.queries import getDocument +from wannadb_web.postgres.queries import getDocument_by_name from wannadb_web.postgres.transactions import addDocument from wannadb_web.worker.util import TaskObject @@ -32,21 +30,21 @@ class WannaDB_WebAPI: def __init__(self, user_id: int, task_object: TaskObject, document_base_name: str, organisation_id: int): - logger.info("WannaDB_WebAPI initialized") self.user_id = user_id self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") - self.redisCache = RedisCache(user_id) self.status_callback = task_object.status_callback self.interaction_callback = task_object.interaction_callback self.signals = task_object.signals self.document_base_name = document_base_name + self.document_base: Optional[DocumentBase] = None self.organisation_id = organisation_id if resources.MANAGER is None: - self.signals.error.emit("Resource Manager not initialized!") + self.signals.error.emit(Exception("Resource Manager not initialized!")) raise Exception("Resource Manager not initialized!") if self.sqLiteCacheDBWrapper.cache_db.conn is None: - self.signals.error.emit("Cache db could not be initialized!") + self.signals.error.emit(Exception("Cache db could not be initialized!")) raise Exception("Cache db could not be initialized!") + logger.info("WannaDB_WebAPI initialized") def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") @@ -59,7 +57,7 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") - error = "Document base is inconsistent!" + self.signals.error.emit(Exception("Document base is inconsistent!")) # load default preprocessing phase self.signals.status.emit("Loading preprocessing phase...") @@ -83,71 +81,82 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri self.signals.document_base_to_ui.emit(document_base) self.signals.statistics.emit(statistics) self.signals.finished.emit(1) + logger.error("Finished!") self.signals.status.emit("Finished!") except Exception as e: - traceback_str = traceback.format_exc() - self.signals.error.emit(str(e) + "\n" + traceback_str) + logger.error(str(e)) + self.signals.error.emit(e) + raise e def load_document_base_from_bson(self): logger.debug("Called function 'load_document_base_from_bson'.") try: self.sqLiteCacheDBWrapper.reset_cache_db() - document = getDocument(self.document_id, user_id) - get + document_id, document = getDocument_by_name(self.document_base_name, self.organisation_id, self.user_id) if isinstance(document, str): logger.error("document is not a DocumentBase!") - return -1 + self.signals.error.emit(Exception("document is not a DocumentBase!")) + return document_base = DocumentBase.from_bson(document) if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") - return -1 - + self.signals.error.emit(Exception("Document base is inconsistent!")) + return for attribute in document_base.attributes: - cache_db.create_table_by_name(attribute.name) - cache_db.create_input_docs_table("input_document", document_base.documents) + self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) + self.sqLiteCacheDBWrapper.cache_db.create_input_docs_table("input_document", document_base.documents) - logger.info(f"Document base loaded from BSON with ID {document_id}.") - return document_base + logger.info(f"Document base loaded from BSON with id {document_id}.") + self.document_base = document_base except Exception as e: logger.error(str(e)) - return -1 - finally: - if wrapper_cache_db is not None: - wrapper_cache_db.disconnect() + self.signals.error.emit(e) + raise e - def save_document_base_to_bson(self, document_base_name: str, organisation_id: int, document_base: DocumentBase, user_id: int): + def save_document_base_to_bson(self): logger.debug("Called function 'save_document_base_to_bson'.") + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return try: - document_id = addDocument(document_base_name, document_base.to_bson(), organisation_id, user_id) + document_id = addDocument(self.document_base_name, self.document_base.to_bson(), self.organisation_id, + self.user_id) if document_id is None: logger.error("Document base could not be saved to BSON!") elif document_id == -1: logger.error("Document base could not be saved to BSON! Document name already exists!") - return -1 + self.signals.error.emit(Exception("Document base could not be saved to BSON! Document name already exists!")) logger.info(f"Document base saved to BSON with ID {document_id}.") - return document_id + self.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") except Exception as e: - logger.debug(str(e)) + logger.error(str(e)) + self.signals.error.emit(e) + raise e - def save_table_to_csv(self, document_base: DocumentBase): + def save_table_to_csv(self): logger.debug("Called function 'save_table_to_csv'.") + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return try: buffer = io.StringIO() # check that the table is complete - for attribute in document_base.attributes: - for document in document_base.documents: + for attribute in self.document_base.attributes: + for document in self.document_base.documents: if attribute.name not in document.attribute_mappings.keys(): logger.error("Cannot save a table with unpopulated attributes!") - return -1 + self.signals.error.emit(Exception("Cannot save a table with unpopulated attributes!")) # TODO: currently stores the text of the first matching nugget (if there is one) - table_dict = document_base.to_table_dict("text") + table_dict = self.document_base.to_table_dict("text") headers = list(table_dict.keys()) rows = [] for ix in range(len(table_dict[headers[0]])): @@ -158,115 +167,136 @@ def save_table_to_csv(self, document_base: DocumentBase): elif not table_dict[header][ix]: row.append(None) else: - row.append(table_dict[header][ix][0]) + row.append(table_dict[header][ix][0]) # type: ignore rows.append(row) writer = csv.writer(buffer, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL) writer.writerow(headers) writer.writerows(rows) - except FileNotFoundError: - logger.error("Directory does not exist!") except Exception as e: logger.error(str(e)) + self.signals.error.emit(e) + raise e - def add_attribute(self, name: str, document_base: DocumentBase): + def add_attribute(self, name: str): logger.debug("Called function 'add_attribute'.") - try: - if name in [attribute.name for attribute in document_base.attributes]: - logger.error("Attribute name already exists!") - return -1 + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + elif name in [attribute.name for attribute in self.document_base.attributes]: + logger.error("Attribute name already exists!") + self.signals.error.emit(Exception("Attribute name already exists!")) + elif name == "": + logger.error("Attribute name must not be empty!") + self.signals.error.emit(Exception("Attribute name must not be empty!")) + else: + self.document_base.attributes.append(Attribute(name)) + logger.debug(f"Attribute '{name}' added.") + self.signals.status.emit(f"Attribute '{name}' added.") + + + def add_attributes(self, names: str): + logger.debug("Called function 'add_attributes'.") + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return + + already_existing_names = [] + for name in names: + if name in [attribute.name for attribute in self.document_base.attributes]: + logger.info(f"Attribute name '{name}' already exists and was thus not added.") + already_existing_names.append(name) elif name == "": - logger.error("Attribute name must not be empty!") - return -1 + logger.info("Attribute name must not be empty and was thus ignored.") else: - document_base.attributes.append(Attribute(name)) + self.document_base.attributes.append(Attribute(name)) logger.debug(f"Attribute '{name}' added.") - return 0 - except Exception as e: - logger.error(str(e)) + return already_existing_names - def add_attributes(self, names: str, document_base: DocumentBase): - logger.debug("Called function 'add_attributes'.") - try: - already_existing_names = [] - for name in names: - if name in [attribute.name for attribute in document_base.attributes]: - logger.info(f"Attribute name '{name}' already exists and was thus not added.") - already_existing_names.append(name) - elif name == "": - logger.info("Attribute name must not be empty and was thus ignored.") - else: - document_base.attributes.append(Attribute(name)) - logger.debug(f"Attribute '{name}' added.") - return already_existing_names - except Exception as e: - logger.error(str(e)) - def remove_attribute(self, name: str, document_base: DocumentBase): + def remove_attribute(self, name: str): logger.debug("Called function 'remove_attribute'.") - try: - if name in [attribute.name for attribute in document_base.attributes]: - for document in document_base.documents: - if name in document.attribute_mappings.keys(): - del document.attribute_mappings[name] - - for attribute in document_base.attributes: - if attribute.name == name: - document_base.attributes.remove(attribute) - break - return 0 - else: - logger.error("Attribute name does not exist!") - return -1 - except Exception as e: - logger.error(str(e)) - - def forget_matches_for_attribute(self, name: str, document_base: DocumentBase): + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return + + if name in [attribute.name for attribute in self.document_base.attributes]: + for document in self.document_base.documents: + if name in document.attribute_mappings.keys(): + del document.attribute_mappings[name] + + for attribute in self.document_base.attributes: + if attribute.name == name: + self.document_base.attributes.remove(attribute) + break + self.signals.status.emit(f"Attribute '{name}' removed.") + else: + logger.error("Attribute name does not exist!") + self.signals.error.emit(Exception("Attribute name does not exist!")) + + + def forget_matches_for_attribute(self, name: str): logger.debug("Called function 'forget_matches_for_attribute'.") + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return try: - if name in [attribute.name for attribute in document_base.attributes]: - for document in document_base.documents: + if name in [attribute.name for attribute in self.document_base.attributes]: + for document in self.document_base.documents: if name in document.attribute_mappings.keys(): del document.attribute_mappings[name] - return 0 + self.signals.status.emit(f"Matches for attribute '{name}' forgotten.") else: logger.error("Attribute name does not exist!") - return -1 + self.signals.error.emit(Exception("Attribute name does not exist!")) except Exception as e: logger.error(str(e)) + self.signals.error.emit(e) + raise e - def forget_matches(self, name: str, user_id: int, document_base: DocumentBase): + def forget_matches(self, name: str): logger.debug("Called function 'forget_matches'.") - wrapper_cache_db: Optional[SQLiteCacheDBWrapper] = None + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return try: - wrapper_cache_db = Cache_DB.Cache_Manager.user(user_id) - cache_db = wrapper_cache_db.cache_db - for attribute in document_base.attributes: + cache_db = self.sqLiteCacheDBWrapper.cache_db + for attribute in self.document_base.attributes: cache_db.delete_table(attribute.name) cache_db.create_table_by_name(attribute.name) - for document in document_base.documents: + for document in self.document_base.documents: document.attribute_mappings.clear() logger.debug(f"Matche: {name} forgotten.") - return 0 + self.signals.status.emit(f"Matche: {name} forgotten.") except Exception as e: logger.error(str(e)) - return -1 - finally: - if wrapper_cache_db is not None: - wrapper_cache_db.disconnect() + self.signals.error.emit(e) + raise e - def save_statistics_to_json(self, statistics: Statistics): + def save_statistics_to_json(self): logger.debug("Called function 'save_statistics_to_json'.") try: - return json.dumps(statistics.to_serializable(), indent=2) + return json.dumps(self.signals.statistics.to_json(), indent=2) except Exception as e: logger.error(str(e)) + self.signals.error.emit(e) + raise e - def interactive_table_population(self, document_base: DocumentBase, statistics: Statistics): + def interactive_table_population(self): logger.debug("Called slot 'interactive_table_population'.") + try: + if self.document_base is None: + logger.error("Document base not loaded!") + self.signals.error.emit(Exception("Document base not loaded!")) + return + # load default matching phase - self.signals.status.emit("Loading matching phase...", -1) + self.signals.status.emit("Loading matching phase...") # TODO: this should not be implemented here! def find_additional_nuggets(nugget, documents): @@ -321,8 +351,10 @@ def find_additional_nuggets(nugget, documents): ] ) - matching_phase(document_base, self.interaction_callback, self.status_callback, statistics) - self.signals.document_base_to_ui.emit(document_base) - self.signals.finished.emit("Finished!") + matching_phase(self.document_base, self.interaction_callback, self.status_callback, self.signals.statistics.msg) + self.signals.document_base_to_ui.emit(self.document_base) + self.signals.finished.emit(1) except Exception as e: + logger.error(str(e)) self.signals.error.emit(e) + raise e \ No newline at end of file From bb06dc2cd608599f7364bbb75a08ec7975da2deb Mon Sep 17 00:00:00 2001 From: cophilot Date: Tue, 16 Jan 2024 14:49:04 +0100 Subject: [PATCH 148/254] added volume for flower --- docker-compose-prod.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index 4491c02e..c6abd035 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -43,6 +43,8 @@ services: command: ['celery', '-A', 'app.celery', 'flower'] env_file: - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb networks: - mynetwork ports: From c9650b34ce6678b60b0b877906b462c05f1de6c0 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 16 Jan 2024 20:05:00 +0100 Subject: [PATCH 149/254] add update_document_base task --- wannadb_web/routing/core.py | 20 ++-- wannadb_web/worker/Web_API.py | 180 ++++++++++++++++++---------------- wannadb_web/worker/tasks.py | 60 +++++++++++- wannadb_web/worker/util.py | 7 +- 4 files changed, 162 insertions(+), 105 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 04975da5..cc874946 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -69,14 +69,13 @@ def create_document(): ] } """ - data = request.get_json() - - authorization = request.headers.get("authorization") - #authorization = form.get("authorization") - organisation_id = data.get("organisationId") - base_name = data.get("baseName") - document_ids = data.get("document_ids") - attributes_string = data.get("attributes") + form = request.form + # authorization = request.headers.get("authorization") + authorization = form.get("authorization") + organisation_id = form.get("organisationId") + base_name = form.get("baseName") + document_ids = form.get("document_ids") + attributes_string = form.get("attributes") if (organisation_id is None or base_name is None or document_ids is None or attributes_string is None or authorization is None): return make_response({"error": "missing parameters"}, 400) @@ -95,9 +94,8 @@ def create_document(): attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, - base_name,organisation_id)) + base_name, organisation_id)) return make_response({'task_id': task.id}, 202) @@ -110,7 +108,7 @@ def longtask(): @core_routes.route('/status/') -def task_status(task_id): # -> Any: +def task_status(task_id): task: AsyncResult = AsyncResult(task_id) meta = task.info if meta is None: diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 55bbd325..34f6ccab 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -2,7 +2,6 @@ import io import json import logging -import traceback from typing import Optional from wannadb import resources @@ -32,23 +31,22 @@ class WannaDB_WebAPI: def __init__(self, user_id: int, task_object: TaskObject, document_base_name: str, organisation_id: int): self.user_id = user_id self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") - self.status_callback = task_object.status_callback - self.interaction_callback = task_object.interaction_callback - self.signals = task_object.signals + self.task_object = task_object self.document_base_name = document_base_name self.document_base: Optional[DocumentBase] = None self.organisation_id = organisation_id + if resources.MANAGER is None: - self.signals.error.emit(Exception("Resource Manager not initialized!")) + self.task_object.signals.error.emit(Exception("Resource Manager not initialized!")) raise Exception("Resource Manager not initialized!") if self.sqLiteCacheDBWrapper.cache_db.conn is None: - self.signals.error.emit(Exception("Cache db could not be initialized!")) + self.task_object.signals.error.emit(Exception("Cache db could not be initialized!")) raise Exception("Cache db could not be initialized!") logger.info("WannaDB_WebAPI initialized") def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") - self.signals.status.emit("create_document_base") + self.task_object.signals.status.emit("create_document_base") try: self.sqLiteCacheDBWrapper.reset_cache_db() @@ -57,10 +55,10 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") - self.signals.error.emit(Exception("Document base is inconsistent!")) + self.task_object.signals.error.emit(Exception("Document base is inconsistent!")) # load default preprocessing phase - self.signals.status.emit("Loading preprocessing phase...") + self.task_object.signals.status.emit("Loading preprocessing phase...") # noinspection PyTypeChecker preprocessing_phase = Pipeline([ @@ -76,17 +74,20 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri RelativePositionEmbedder() ]) - preprocessing_phase(document_base, EmptyInteractionCallback(), self.status_callback, statistics) + preprocessing_phase(document_base, EmptyInteractionCallback(), self.task_object.status_callback, statistics) + + self.document_base = document_base + + self.task_object.signals.document_base_to_ui.emit(document_base) + self.task_object.signals.statistics.emit(statistics) + self.task_object.signals.finished.emit(1) + self.task_object.signals.status.emit("Finished!") + self.task_object.update(None) - self.signals.document_base_to_ui.emit(document_base) - self.signals.statistics.emit(statistics) - self.signals.finished.emit(1) - logger.error("Finished!") - self.signals.status.emit("Finished!") except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e def load_document_base_from_bson(self): @@ -97,13 +98,13 @@ def load_document_base_from_bson(self): document_id, document = getDocument_by_name(self.document_base_name, self.organisation_id, self.user_id) if isinstance(document, str): logger.error("document is not a DocumentBase!") - self.signals.error.emit(Exception("document is not a DocumentBase!")) + self.task_object.signals.error.emit(Exception("document is not a DocumentBase!")) return document_base = DocumentBase.from_bson(document) if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") - self.signals.error.emit(Exception("Document base is inconsistent!")) + self.task_object.signals.error.emit(Exception("Document base is inconsistent!")) return for attribute in document_base.attributes: @@ -115,14 +116,14 @@ def load_document_base_from_bson(self): except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e def save_document_base_to_bson(self): logger.debug("Called function 'save_document_base_to_bson'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) return try: document_id = addDocument(self.document_base_name, self.document_base.to_bson(), self.organisation_id, @@ -131,19 +132,20 @@ def save_document_base_to_bson(self): logger.error("Document base could not be saved to BSON!") elif document_id == -1: logger.error("Document base could not be saved to BSON! Document name already exists!") - self.signals.error.emit(Exception("Document base could not be saved to BSON! Document name already exists!")) + self.task_object.signals.error.emit( + Exception("Document base could not be saved to BSON! Document name already exists!")) logger.info(f"Document base saved to BSON with ID {document_id}.") - self.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") + self.task_object.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e def save_table_to_csv(self): logger.debug("Called function 'save_table_to_csv'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) return try: buffer = io.StringIO() @@ -153,7 +155,8 @@ def save_table_to_csv(self): for document in self.document_base.documents: if attribute.name not in document.attribute_mappings.keys(): logger.error("Cannot save a table with unpopulated attributes!") - self.signals.error.emit(Exception("Cannot save a table with unpopulated attributes!")) + self.task_object.signals.error.emit( + Exception("Cannot save a table with unpopulated attributes!")) # TODO: currently stores the text of the first matching nugget (if there is one) table_dict = self.document_base.to_table_dict("text") @@ -174,93 +177,97 @@ def save_table_to_csv(self): writer.writerows(rows) except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e - def add_attribute(self, name: str): + def add_attribute(self, attribute: Attribute): logger.debug("Called function 'add_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - elif name in [attribute.name for attribute in self.document_base.attributes]: + self.task_object.signals.error.emit(Exception("Document base not loaded!")) + elif attribute in self.document_base.attributes: logger.error("Attribute name already exists!") - self.signals.error.emit(Exception("Attribute name already exists!")) - elif name == "": - logger.error("Attribute name must not be empty!") - self.signals.error.emit(Exception("Attribute name must not be empty!")) + self.task_object.signals.error.emit(Exception("Attribute name already exists!")) else: - self.document_base.attributes.append(Attribute(name)) - logger.debug(f"Attribute '{name}' added.") - self.signals.status.emit(f"Attribute '{name}' added.") - + self.document_base.attributes.append(attribute) + logger.debug(f"Attribute '{attribute.name}' added.") + self.task_object.signals.status.emit(f"Attribute '{attribute.name}' added.") + self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) + self.task_object.update(None) - def add_attributes(self, names: str): + def add_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'add_attributes'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.update(None) return already_existing_names = [] - for name in names: - if name in [attribute.name for attribute in self.document_base.attributes]: - logger.info(f"Attribute name '{name}' already exists and was thus not added.") - already_existing_names.append(name) - elif name == "": + for attribute in attributes: + if attribute in self.document_base.attributes: + logger.info(f"Attribute name '{attribute.name}' already exists and was thus not added.") + already_existing_names.append(attribute) + elif attribute is None: logger.info("Attribute name must not be empty and was thus ignored.") else: - self.document_base.attributes.append(Attribute(name)) - logger.debug(f"Attribute '{name}' added.") + self.document_base.attributes.append(attribute) + self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) + logger.debug(f"Attribute '{attribute.name}' added.") + self.task_object.update(None) return already_existing_names - - def remove_attribute(self, name: str): + def remove_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'remove_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.update(None) return + for attribute in attributes: + if attribute in self.document_base.attributes: + for document in self.document_base.documents: + if attribute.name in document.attribute_mappings.keys(): + del document.attribute_mappings[attribute.name] + + for old_attribute in self.document_base.attributes: + if old_attribute == attribute: + self.document_base.attributes.remove(attribute) + break + self.task_object.signals.status.emit(f"Attribute '{attribute.name}' removed.") + else: + logger.error("Attribute name does not exist!") + self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) + self.task_object.update(None) - if name in [attribute.name for attribute in self.document_base.attributes]: - for document in self.document_base.documents: - if name in document.attribute_mappings.keys(): - del document.attribute_mappings[name] - - for attribute in self.document_base.attributes: - if attribute.name == name: - self.document_base.attributes.remove(attribute) - break - self.signals.status.emit(f"Attribute '{name}' removed.") - else: - logger.error("Attribute name does not exist!") - self.signals.error.emit(Exception("Attribute name does not exist!")) - +## todo: below not implemented yet - def forget_matches_for_attribute(self, name: str): + def forget_matches_for_attribute(self, attributes: list[Attribute]): logger.debug("Called function 'forget_matches_for_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) return try: - if name in [attribute.name for attribute in self.document_base.attributes]: - for document in self.document_base.documents: - if name in document.attribute_mappings.keys(): - del document.attribute_mappings[name] - self.signals.status.emit(f"Matches for attribute '{name}' forgotten.") - else: - logger.error("Attribute name does not exist!") - self.signals.error.emit(Exception("Attribute name does not exist!")) + for attribute in attributes: + if attribute in self.document_base.attributes: + for document in self.document_base.documents: + if attribute.name in document.attribute_mappings.keys(): + del document.attribute_mappings[attribute.name] + self.task_object.signals.status.emit(f"Matches for attribute '{attribute.name}' forgotten.") + else: + logger.error("Attribute name does not exist!") + self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e def forget_matches(self, name: str): logger.debug("Called function 'forget_matches'.") if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) return try: @@ -271,19 +278,19 @@ def forget_matches(self, name: str): for document in self.document_base.documents: document.attribute_mappings.clear() logger.debug(f"Matche: {name} forgotten.") - self.signals.status.emit(f"Matche: {name} forgotten.") + self.task_object.signals.status.emit(f"Matche: {name} forgotten.") except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e def save_statistics_to_json(self): logger.debug("Called function 'save_statistics_to_json'.") try: - return json.dumps(self.signals.statistics.to_json(), indent=2) + return json.dumps(self.task_object.signals.statistics.to_json(), indent=2) except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) + self.task_object.signals.error.emit(e) raise e def interactive_table_population(self): @@ -292,11 +299,11 @@ def interactive_table_population(self): try: if self.document_base is None: logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) + self.task_object.signals.error.emit(Exception("Document base not loaded!")) return # load default matching phase - self.signals.status.emit("Loading matching phase...") + self.task_object.signals.status.emit("Loading matching phase...") # TODO: this should not be implemented here! def find_additional_nuggets(nugget, documents): @@ -351,10 +358,11 @@ def find_additional_nuggets(nugget, documents): ] ) - matching_phase(self.document_base, self.interaction_callback, self.status_callback, self.signals.statistics.msg) - self.signals.document_base_to_ui.emit(self.document_base) - self.signals.finished.emit(1) + matching_phase(self.document_base, self.task_object.interaction_callback, self.task_object.status_callback, + self.task_object.signals.statistics.msg) + self.task_object.signals.document_base_to_ui.emit(self.document_base) + self.task_object.signals.finished.emit(1) except Exception as e: logger.error(str(e)) - self.signals.error.emit(e) - raise e \ No newline at end of file + self.task_object.signals.error.emit(e) + raise e diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 6bc30bca..a9697c73 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -2,6 +2,7 @@ import pickle import random import time +from typing import Optional from celery import current_app @@ -12,7 +13,6 @@ from wannadb_web.worker.util import State, TaskUpdate from wannadb_web.worker.util import TaskObject - # class U: # def update_state(*args, **kwargs): # print('update_state called with args: ', args, ' and kwargs: ', kwargs) @@ -20,6 +20,8 @@ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + + # RedisConnection() # ResourceManager() # authorization = ( @@ -95,7 +97,7 @@ def task_callback_fn(state: str, meta: TaskObject): saving document base """ - #api.save_document_base_to_bson() + api.save_document_base_to_bson() """ response @@ -104,14 +106,64 @@ def task_callback_fn(state: str, meta: TaskObject): if task_object.signals.finished.msg is None: task_object.update(State.ERROR, "task_object signals not set?") else: - task_object.update(State.SUCCESS, task_object.signals.finished.msg) + + task_object.update(State.SUCCESS) + + task_object.update(State.SUCCESS) return task_object.to_dump() except Exception as e: - #task_object.update(State.FAILURE, str(e)) + # task_object.update(State.FAILURE, str(e)) raise e +@current_app.task(bind=True) +def update_document_base(self, base_name: str, user_id, attributes_dump: Optional[bytes], statistics_dump: bytes, + organisation_id: int): + """ + define values + """ + statistics: Statistics = pickle.loads(statistics_dump) + + def task_callback_fn(state: str, meta: TaskObject): + if isinstance(state, str) and state is not None and len(state) > 0: + meta_dump = meta.to_dump() + self.update_state(state=state, meta=meta_dump) + else: + raise Exception("task_callback_fn error Invalid state") + + task_callback = TaskUpdate(task_callback_fn) + + task_object = TaskObject(task_callback) + + """ + init api + """ + + api = WannaDB_WebAPI(1, task_object, base_name, organisation_id) + if task_object.signals.error.msg: + task_object.update(State.FAILURE, api.signals) + raise task_object.signals.error.msg + task_object.update(state=State.PENDING, msg="api created") + + api.load_document_base_from_bson() + if task_object.signals.error.msg: + task_object.update(State.FAILURE, api.signals) + raise task_object.signals.error.msg + task_object.update(state=State.PENDING, msg="document base loaded") + + if attributes_dump is not None: + attributes: list[Attribute] = pickle.loads(attributes_dump) + api.add_attributes(attributes) + if task_object.signals.error.msg: + task_object.update(State.FAILURE, api.signals) + raise task_object.signals.error.msg + task_object.update(state=State.PENDING, msg="attributes added") + api.add_attributes(attributes) + + ## todo: further manipulations here + + @current_app.task(bind=True) def long_task(self): try: diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index 247a6253..dc88540a 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -83,15 +83,14 @@ def signals(self) -> Signals: def signals(self, signals: Signals): self.__signals = signals - def update(self, state: State, msg=""): + def update(self, state: Optional[State], msg=""): if self.task_update_fn is None: raise Exception("update error task_update_fn is None do you want to update here?") if isinstance(state, State) and state is not None: self.state = state + if msg is not None: self.msg = msg - self.task_update_fn(self.state.value, self) - else: - raise Exception(f"update error State is {type(state)}") + self.task_update_fn(self.state.value, self) def to_dump(self): _state = self.state From 76a078e62a40400025453dabfd1579e073c795ea Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 17 Jan 2024 15:03:48 +0100 Subject: [PATCH 150/254] added /update/document_base --- wannadb_web/routing/core.py | 52 ++++++++++++++++++++++++++++++++++++- wannadb_web/worker/tasks.py | 2 +- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index cc874946..806dfb09 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -37,7 +37,7 @@ from wannadb.statistics import Statistics from wannadb_web.util import tokenDecode from wannadb_web.worker.data import nugget_to_json -from wannadb_web.worker.tasks import create_document_base_task, long_task +from wannadb_web.worker.tasks import create_document_base_task, long_task, update_document_base from wannadb_web.worker.util import TaskObject core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -99,6 +99,56 @@ def create_document(): return make_response({'task_id': task.id}, 202) +@core_routes.route('/update/document_base', methods=['POST']) +def create_document(): + """ + Endpoint for update a document base. + + This endpoint is used to update a document base from a list of attributes. + + Example Header: + { + "Authorization": "your_authorization_token" + } + + Example JSON Payload: + { + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "attributes": [ + "plane","car","bike" + ] + } + """ + form = request.form + # authorization = request.headers.get("authorization") + authorization = form.get("authorization") + organisation_id = form.get("organisationId") + base_name = form.get("baseName") + attributes_string = form.get("attributes") + if (organisation_id is None or base_name is None or attributes_string is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + attributes = [] + for att in attributes_string: + attributes.append(Attribute(att)) + + statistics = Statistics(False) + user_id = _token.id + + attributesDump = pickle.dumps(attributes) + statisticsDump = pickle.dumps(statistics) + + task = update_document_base.apply_async(args=(user_id, attributesDump, statisticsDump, + base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) + @core_routes.route('/longtask', methods=['POST']) def longtask(): diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index a9697c73..6a8329dc 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -118,7 +118,7 @@ def task_callback_fn(state: str, meta: TaskObject): @current_app.task(bind=True) -def update_document_base(self, base_name: str, user_id, attributes_dump: Optional[bytes], statistics_dump: bytes, +def update_document_base(self, user_id, attributes_dump: Optional[bytes], statistics_dump: bytes, base_name: str, organisation_id: int): """ define values From d142803abb0d1fb1705988602148defd1fb9f737 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 15:25:21 +0100 Subject: [PATCH 151/254] removed msg and cleand up the task status --- wannadb_web/routing/core.py | 2 +- wannadb_web/worker/data.py | 48 +++++++++++++------------------------ wannadb_web/worker/tasks.py | 37 ++++++++++++++-------------- wannadb_web/worker/util.py | 11 +++------ 4 files changed, 38 insertions(+), 60 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 806dfb09..26cff126 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -169,5 +169,5 @@ def task_status(task_id): if not isinstance(meta, bytes): return make_response({"error": "task not correct"}, 404) taskObject = TaskObject.from_dump(meta) - return make_response({"state": taskObject.state.value, "meta": taskObject.signals.to_json(), "msg": taskObject.msg}, + return make_response({"state": taskObject.state.value, "meta": taskObject.signals.to_json()}, 200) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index b25a53e7..86eb1e4d 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -62,18 +62,15 @@ def __init__(self): self.cache_db_to_ui = _Dump("cache_db_to_ui") def to_json(self) -> dict[str, str]: - try: - return {self.feedback.type: self.feedback.to_json(), - self.error.type: self.error.to_json(), - self.status.type: self.status.to_json(), - self.finished.type: self.finished.to_json(), - self.document_base_to_ui.type: self.document_base_to_ui.to_json(), - self.statistics.type: self.statistics.to_json(), - self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), - self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} - except Exception as e: - print(e) - return {"error": "signals to json error"} + return {self.feedback.type: self.feedback.to_json(), + self.error.type: self.error.to_json(), + self.status.type: self.status.to_json(), + self.finished.type: self.finished.to_json(), + self.document_base_to_ui.type: self.document_base_to_ui.to_json(), + self.statistics.type: self.statistics.to_json(), + self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + class Emitable(ABC): @@ -106,10 +103,7 @@ def msg(self): return self.__msg def to_json(self): - return { - 'type': self.type, - 'msg': str(self.msg) - } + return str(self.msg) def emit(self, status: str): self.__msg = status @@ -128,10 +122,7 @@ def msg(self): return self.__msg def to_json(self): - return { - 'type': self.type, - 'msg': self.msg - } + return str(self.msg) def emit(self, status: float): self.__msg = status @@ -150,10 +141,7 @@ def msg(self): return self.__msg def to_json(self): - return { - 'type': self.type, - 'msg': str(self.msg) - } + return str(self.msg) def emit(self, exception: BaseException): self.__msg = exception @@ -213,10 +201,8 @@ def msg(self): return self.__msg def to_json(self): - return { - 'type': self.type, - 'msg': self.__msg.to_serializable() - } + return self.__msg.to_serializable() + def emit(self, statistic: Statistics): self.__msg = statistic @@ -232,10 +218,8 @@ def msg(self): return self.__msg def to_json(self): - return { - 'type': self.type, - 'msg': json.dumps(self.msg) - } + return json.dumps(self.msg) + def emit(self, status): self.__msg = status diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 6a8329dc..742e2eae 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -64,21 +64,21 @@ def task_callback_fn(state: str, meta: TaskObject): api = WannaDB_WebAPI(1, task_object, base_name, organisation_id) - task_object.update(state=State.PENDING, msg="api created") + task_object.update(state=State.PENDING) try: """ decoding """ if not isinstance(attributes[0], Attribute): - task_object.update(State.FAILURE, "Invalid attributes") + task_object.update(State.FAILURE) raise Exception("Invalid attributes") if not isinstance(statistics, Statistics): - task_object.update(State.FAILURE, "Invalid statistics") + task_object.update(State.FAILURE) raise Exception("Invalid statistics") docs = getDocuments(document_ids, user_id) - task_object.update(State.PENDING, "Creating document base") + task_object.update(State.PENDING) documents = [] if docs: for doc in docs: @@ -91,7 +91,7 @@ def task_callback_fn(state: str, meta: TaskObject): api.create_document_base(documents, attributes, statistics) if task_object.signals.error.msg: - task_object.update(State.FAILURE, api.signals) + task_object.update(State.FAILURE) """ saving document base @@ -104,10 +104,8 @@ def task_callback_fn(state: str, meta: TaskObject): """ if task_object.signals.finished.msg is None: - task_object.update(State.ERROR, "task_object signals not set?") - else: - - task_object.update(State.SUCCESS) + task_object.update(State.ERROR) + raise Exception("task_object signals not set?") task_object.update(State.SUCCESS) return task_object.to_dump() @@ -118,7 +116,7 @@ def task_callback_fn(state: str, meta: TaskObject): @current_app.task(bind=True) -def update_document_base(self, user_id, attributes_dump: Optional[bytes], statistics_dump: bytes, base_name: str, +def update_document_base(self, user_id:int, attributes_dump: Optional[bytes], statistics_dump: bytes, base_name: str, organisation_id: int): """ define values @@ -142,26 +140,27 @@ def task_callback_fn(state: str, meta: TaskObject): api = WannaDB_WebAPI(1, task_object, base_name, organisation_id) if task_object.signals.error.msg: - task_object.update(State.FAILURE, api.signals) + task_object.update(State.FAILURE) raise task_object.signals.error.msg - task_object.update(state=State.PENDING, msg="api created") + task_object.update(state=State.PENDING) api.load_document_base_from_bson() if task_object.signals.error.msg: - task_object.update(State.FAILURE, api.signals) + task_object.update(State.FAILURE) raise task_object.signals.error.msg - task_object.update(state=State.PENDING, msg="document base loaded") + task_object.update(state=State.PENDING) if attributes_dump is not None: attributes: list[Attribute] = pickle.loads(attributes_dump) api.add_attributes(attributes) if task_object.signals.error.msg: - task_object.update(State.FAILURE, api.signals) + task_object.update(State.FAILURE) raise task_object.signals.error.msg - task_object.update(state=State.PENDING, msg="attributes added") + task_object.update(state=State.PENDING) api.add_attributes(attributes) - ## todo: further manipulations here + +## todo: further manipulations here @current_app.task(bind=True) @@ -190,8 +189,8 @@ def task_callback_fn(state: str, meta: TaskObject): random.choice(adjective), random.choice(noun)) time.sleep(1) - task_object.update(state=State.PENDING, msg=data) - task_object.update(state=State.SUCCESS, msg='Task completed!') + task_object.update(state=State.PENDING) + task_object.update(state=State.SUCCESS) return data except Exception as e: self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index dc88540a..8e8178bb 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -38,7 +38,6 @@ class TaskObject: task_update_fn: Optional[TaskUpdate] __signals: Signals = field(default_factory=Signals) __state: State = State.STARTED - msg: str = "" @property def status_callback(self): @@ -83,25 +82,21 @@ def signals(self) -> Signals: def signals(self, signals: Signals): self.__signals = signals - def update(self, state: Optional[State], msg=""): + def update(self, state: Optional[State]): if self.task_update_fn is None: raise Exception("update error task_update_fn is None do you want to update here?") if isinstance(state, State) and state is not None: self.state = state - if msg is not None: - self.msg = msg self.task_update_fn(self.state.value, self) def to_dump(self): _state = self.state _signals = self.signals - _msg = self.msg - return pickle.dumps((_state, _signals, _msg)) + return pickle.dumps((_state, _signals)) @staticmethod def from_dump(dump: bytes): - state, signals, msg = pickle.loads(dump) + state, signals = pickle.loads(dump) to = TaskObject(None,state) to.signals = signals - to.msg = msg return to From 69064ea471657c2fd7c9749be49cb31c41aa0fc5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 15:34:38 +0100 Subject: [PATCH 152/254] update rout to restfull api formate --- wannadb_web/routing/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 26cff126..46c712e2 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -99,7 +99,7 @@ def create_document(): return make_response({'task_id': task.id}, 202) -@core_routes.route('/update/document_base', methods=['POST']) +@core_routes.route('/document_base/attributes', methods=['UPDATE']) def create_document(): """ Endpoint for update a document base. From e330ae7ccee23f4872f8ec6d5e1445748da7561c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 15:34:38 +0100 Subject: [PATCH 153/254] update rout to restfull api formate --- wannadb_web/routing/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 26cff126..0a2db716 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -99,8 +99,8 @@ def create_document(): return make_response({'task_id': task.id}, 202) -@core_routes.route('/update/document_base', methods=['POST']) -def create_document(): +@core_routes.route('/document_base/attributes', methods=['UPDATE']) +def document_base(): """ Endpoint for update a document base. From 2075d91abaf8c05ef3ad591783caa98d1dea0441 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 15:59:18 +0100 Subject: [PATCH 154/254] fix bug tokenDecode dos not exist --- wannadb/data/data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wannadb/data/data.py b/wannadb/data/data.py index fcb166d4..92a8a2aa 100644 --- a/wannadb/data/data.py +++ b/wannadb/data/data.py @@ -599,7 +599,8 @@ def to_bson(self) -> bytes: serializable_base["documents"].append(serializable_document) logger.info("Convert to BSON bytes.") - bson_bytes: bytes = bson.tokenEncode(serializable_base) + bson_bytes: bytes = bson.encode(serializable_base) + #bson_bytes: bytes = bson.tokenEncode(serializable_base) tack: float = time.time() logger.info(f"Serialized document base in {tack - tick} seconds.") @@ -619,7 +620,8 @@ def from_bson(cls, bson_bytes: bytes) -> "DocumentBase": tick: float = time.time() logger.info("Convert from BSON bytes.") - serialized_base: Dict[str, Any] = bson.tokenDecode(bson_bytes) + serialized_base: Dict[str, Any] = bson.decode(bson_bytes) + #serialized_base: Dict[str, Any] = bson.tokenDecode(bson_bytes) # deserialize the document base document_base: "DocumentBase" = cls([], []) From c4884101bb8921fd8e1639e2055001c635a14816 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 17:34:20 +0100 Subject: [PATCH 155/254] fix bug when adding bytes to db --- wannadb_web/postgres/transactions.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index bb863f81..2e56ff95 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -356,17 +356,23 @@ def adjUserAuthorisation(organisationName: str, sessionToken: str, userToAdjust: def addDocument(name: str, content: Union[str, bytes], organisationId: int, userid: int): try: + if isinstance(content, str): - insert_data_query = sql.SQL("INSERT INTO documents (name,content,organisationid,userid) " - "VALUES (%s, %s,%s, %s) returning id;") - else: - insert_data_query = sql.SQL("INSERT INTO documents (name,content_byte,organisationid,userid) " - "VALUES (%s, %s,%s, %s) returning id;") - data_to_insert = (name, content, organisationId, userid) - response = execute_transaction(insert_data_query, data_to_insert, commit=True) - return int(response[0][0]) - except IntegrityError: + insert_data_query = sql.SQL("INSERT INTO documents (name, content, organisationid, userid) " + "VALUES (%s, %s, %s, %s) returning id;") + string_data_to_insert = (name, content, organisationId, userid) + response = execute_transaction(insert_data_query, string_data_to_insert, commit=True) + return int(response[0][0]) + elif isinstance(content, bytes): + insert_data_query = sql.SQL("INSERT INTO documents (name, content_byte, organisationid, userid) " + "VALUES (%s, %s, %s, %s) returning id;") + byte_data_to_insert = (name, content, organisationId, userid) + response = execute_transaction(insert_data_query, byte_data_to_insert, commit=True) + return int(response[0][0]) + + except IntegrityError as i: + logger.error(str(i)) return -1 except Exception as e: - print("addDocument failed because: \n", e) + logger.error(str(e)) From 96bef498b3e6ab9c2a599a6aacb48b8137e11d09 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 17:34:52 +0100 Subject: [PATCH 156/254] fix bug when adding bytes to db --- wannadb_web/worker/Web_API.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 34f6ccab..a8b83bcd 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -96,10 +96,11 @@ def load_document_base_from_bson(self): self.sqLiteCacheDBWrapper.reset_cache_db() document_id, document = getDocument_by_name(self.document_base_name, self.organisation_id, self.user_id) - if isinstance(document, str): + if not isinstance(document, bytes): logger.error("document is not a DocumentBase!") self.task_object.signals.error.emit(Exception("document is not a DocumentBase!")) return + document_base = DocumentBase.from_bson(document) if not document_base.validate_consistency(): @@ -113,7 +114,6 @@ def load_document_base_from_bson(self): logger.info(f"Document base loaded from BSON with id {document_id}.") self.document_base = document_base - except Exception as e: logger.error(str(e)) self.task_object.signals.error.emit(e) @@ -131,11 +131,13 @@ def save_document_base_to_bson(self): if document_id is None: logger.error("Document base could not be saved to BSON!") elif document_id == -1: - logger.error("Document base could not be saved to BSON! Document name already exists!") + logger.error(f"Document base could not be saved to BSON! Document {self.document_base_name} already exists!") self.task_object.signals.error.emit( - Exception("Document base could not be saved to BSON! Document name already exists!")) - logger.info(f"Document base saved to BSON with ID {document_id}.") - self.task_object.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") + Exception(f"Document base could not be saved to BSON! Document {self.document_base_name} already exists!")) + elif document_id > 0: + logger.info(f"Document base saved to BSON with ID {document_id}.") + self.task_object.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") + return except Exception as e: logger.error(str(e)) self.task_object.signals.error.emit(e) From b869548ef5158902bf85b59544968b28c1a22523 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 17:42:44 +0100 Subject: [PATCH 157/254] add todo --- wannadb_web/worker/Web_API.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index a8b83bcd..8eb452f9 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -143,6 +143,8 @@ def save_document_base_to_bson(self): self.task_object.signals.error.emit(e) raise e + +# todo: below not implemented yet def save_table_to_csv(self): logger.debug("Called function 'save_table_to_csv'.") if self.document_base is None: From a167fafbc1421bb36e3611b025277b3bc30fadc1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 17:43:31 +0100 Subject: [PATCH 158/254] feat(add_attributes): add --- wannadb_web/worker/tasks.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 742e2eae..e1f729d2 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -62,7 +62,7 @@ def task_callback_fn(state: str, meta: TaskObject): init api """ - api = WannaDB_WebAPI(1, task_object, base_name, organisation_id) + api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) task_object.update(state=State.PENDING) try: @@ -116,12 +116,10 @@ def task_callback_fn(state: str, meta: TaskObject): @current_app.task(bind=True) -def update_document_base(self, user_id:int, attributes_dump: Optional[bytes], statistics_dump: bytes, base_name: str, - organisation_id: int): +def add_attributes(self, user_id:int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): """ define values """ - statistics: Statistics = pickle.loads(statistics_dump) def task_callback_fn(state: str, meta: TaskObject): if isinstance(state, str) and state is not None and len(state) > 0: @@ -138,7 +136,7 @@ def task_callback_fn(state: str, meta: TaskObject): init api """ - api = WannaDB_WebAPI(1, task_object, base_name, organisation_id) + api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) if task_object.signals.error.msg: task_object.update(State.FAILURE) raise task_object.signals.error.msg @@ -156,11 +154,7 @@ def task_callback_fn(state: str, meta: TaskObject): if task_object.signals.error.msg: task_object.update(State.FAILURE) raise task_object.signals.error.msg - task_object.update(state=State.PENDING) - api.add_attributes(attributes) - - -## todo: further manipulations here + task_object.update(state=State.SUCCESS) @current_app.task(bind=True) From abf8bbe86ac22c5a902586eb19c3280bc597c092 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 17 Jan 2024 18:15:11 +0100 Subject: [PATCH 159/254] feat: add remove_attributes, forget_matches_for_attribute,forget_matches --- wannadb_web/worker/Web_API.py | 39 ++++----- wannadb_web/worker/tasks.py | 152 +++++++++++++++++++++++++++------- wannadb_web/worker/util.py | 6 ++ 3 files changed, 149 insertions(+), 48 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 8eb452f9..24416d3f 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -114,6 +114,7 @@ def load_document_base_from_bson(self): logger.info(f"Document base loaded from BSON with id {document_id}.") self.document_base = document_base + except Exception as e: logger.error(str(e)) self.task_object.signals.error.emit(e) @@ -244,50 +245,50 @@ def remove_attributes(self, attributes: list[Attribute]): self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) self.task_object.update(None) -## todo: below not implemented yet - def forget_matches_for_attribute(self, attributes: list[Attribute]): + def forget_matches_for_attribute(self, attribute: Attribute): logger.debug("Called function 'forget_matches_for_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") self.task_object.signals.error.emit(Exception("Document base not loaded!")) return + self.sqLiteCacheDBWrapper.cache_db.delete_table(attribute.name) try: - for attribute in attributes: - if attribute in self.document_base.attributes: - for document in self.document_base.documents: - if attribute.name in document.attribute_mappings.keys(): - del document.attribute_mappings[attribute.name] - self.task_object.signals.status.emit(f"Matches for attribute '{attribute.name}' forgotten.") - else: - logger.error("Attribute name does not exist!") - self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) + if attribute in self.document_base.attributes: + for document in self.document_base.documents: + if attribute.name in document.attribute_mappings.keys(): + del document.attribute_mappings[attribute.name] + self.task_object.signals.status.emit(f"Matches for attribute '{attribute.name}' forgotten.") + self.task_object.signals.document_base_to_ui.emit(self.document_base) + else: + logger.error("Attribute name does not exist!") + self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) except Exception as e: logger.error(str(e)) self.task_object.signals.error.emit(e) raise e - def forget_matches(self, name: str): + def forget_matches(self): logger.debug("Called function 'forget_matches'.") if self.document_base is None: logger.error("Document base not loaded!") self.task_object.signals.error.emit(Exception("Document base not loaded!")) return + for attribute in self.document_base.attributes: + self.sqLiteCacheDBWrapper.cache_db.delete_table(attribute.name) + self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) try: - - cache_db = self.sqLiteCacheDBWrapper.cache_db - for attribute in self.document_base.attributes: - cache_db.delete_table(attribute.name) - cache_db.create_table_by_name(attribute.name) for document in self.document_base.documents: document.attribute_mappings.clear() - logger.debug(f"Matche: {name} forgotten.") - self.task_object.signals.status.emit(f"Matche: {name} forgotten.") + self.task_object.signals.document_base_to_ui.emit(self.document_base) + self.task_object.signals.finished.emit(1) except Exception as e: logger.error(str(e)) self.task_object.signals.error.emit(e) raise e + ## todo: below not implemented yet + def save_statistics_to_json(self): logger.debug("Called function 'save_statistics_to_json'.") try: diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index e1f729d2..50499626 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -40,10 +40,6 @@ @current_app.task(bind=True) def create_document_base_task(self, user_id, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, base_name: str, organisation_id: int): - """ - define values - """ - attributes: list[Attribute] = pickle.loads(attributes_dump) statistics: Statistics = pickle.loads(statistics_dump) @@ -63,11 +59,10 @@ def task_callback_fn(state: str, meta: TaskObject): """ api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - - task_object.update(state=State.PENDING) + task_object.check() try: """ - decoding + Creating document base """ if not isinstance(attributes[0], Attribute): task_object.update(State.FAILURE) @@ -85,24 +80,12 @@ def task_callback_fn(state: str, meta: TaskObject): documents.append(Document(doc[0], doc[1])) else: print("No documents found") - """ - Creating document base - """ api.create_document_base(documents, attributes, statistics) - if task_object.signals.error.msg: - task_object.update(State.FAILURE) - - """ - saving document base - """ + task_object.check() api.save_document_base_to_bson() - """ - response - """ - if task_object.signals.finished.msg is None: task_object.update(State.ERROR) raise Exception("task_object signals not set?") @@ -116,7 +99,7 @@ def task_callback_fn(state: str, meta: TaskObject): @current_app.task(bind=True) -def add_attributes(self, user_id:int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): +def add_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): """ define values """ @@ -137,16 +120,10 @@ def task_callback_fn(state: str, meta: TaskObject): """ api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - if task_object.signals.error.msg: - task_object.update(State.FAILURE) - raise task_object.signals.error.msg - task_object.update(state=State.PENDING) + task_object.check() api.load_document_base_from_bson() - if task_object.signals.error.msg: - task_object.update(State.FAILURE) - raise task_object.signals.error.msg - task_object.update(state=State.PENDING) + task_object.check() if attributes_dump is not None: attributes: list[Attribute] = pickle.loads(attributes_dump) @@ -154,6 +131,123 @@ def task_callback_fn(state: str, meta: TaskObject): if task_object.signals.error.msg: task_object.update(State.FAILURE) raise task_object.signals.error.msg + + api.save_document_base_to_bson() + task_object.check() + task_object.update(state=State.SUCCESS) + + +@current_app.task(bind=True) +def remove_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): + """ + define values + """ + + def task_callback_fn(state: str, meta: TaskObject): + if isinstance(state, str) and state is not None and len(state) > 0: + meta_dump = meta.to_dump() + self.update_state(state=state, meta=meta_dump) + else: + raise Exception("task_callback_fn error Invalid state") + + task_callback = TaskUpdate(task_callback_fn) + + task_object = TaskObject(task_callback) + + """ + init api + """ + + api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) + task_object.check() + + api.load_document_base_from_bson() + task_object.check() + + if attributes_dump is not None: + attributes: list[Attribute] = pickle.loads(attributes_dump) + api.remove_attributes(attributes) + if task_object.signals.error.msg: + task_object.update(State.FAILURE) + raise task_object.signals.error.msg + + api.save_document_base_to_bson() + task_object.check() + task_object.update(state=State.SUCCESS) + + +@current_app.task(bind=True) +def forget_matches_for_attribute(self, user_id: int, attribute_dump: Optional[bytes], base_name: str, + organisation_id: int): + """ + define values + """ + + def task_callback_fn(state: str, meta: TaskObject): + if isinstance(state, str) and state is not None and len(state) > 0: + meta_dump = meta.to_dump() + self.update_state(state=state, meta=meta_dump) + else: + raise Exception("task_callback_fn error Invalid state") + + task_callback = TaskUpdate(task_callback_fn) + + task_object = TaskObject(task_callback) + + """ + init api + """ + + api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) + task_object.check() + + api.load_document_base_from_bson() + task_object.check() + + if attribute_dump is not None: + attribute: Attribute = pickle.loads(attribute_dump) + api.forget_matches_for_attribute(attribute) + if task_object.signals.error.msg: + task_object.update(State.FAILURE) + raise task_object.signals.error.msg + + api.save_document_base_to_bson() + task_object.check() + task_object.update(state=State.SUCCESS) + + +@current_app.task(bind=True) +def forget_matches(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): + """ + define values + """ + + def task_callback_fn(state: str, meta: TaskObject): + if isinstance(state, str) and state is not None and len(state) > 0: + meta_dump = meta.to_dump() + self.update_state(state=state, meta=meta_dump) + else: + raise Exception("task_callback_fn error Invalid state") + + task_callback = TaskUpdate(task_callback_fn) + + task_object = TaskObject(task_callback) + + """ + init api + """ + + api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) + task_object.check() + + api.load_document_base_from_bson() + task_object.check() + + api.forget_matches() + task_object.check() + + api.save_document_base_to_bson() + task_object.check() task_object.update(state=State.SUCCESS) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index 8e8178bb..c4b571d6 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -100,3 +100,9 @@ def from_dump(dump: bytes): to = TaskObject(None,state) to.signals = signals return to + + def check(self): + self.update(None) + if self.signals.error.msg: + self.update(State.FAILURE) + raise self.signals.error.msg From c9c2c2f56a4b528858a1a38a3e62094a0a7f498a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 18 Jan 2024 12:29:27 +0100 Subject: [PATCH 160/254] fix(forget_matches): unnecessary attribute removed --- wannadb_web/worker/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 50499626..46f5fa48 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -217,7 +217,7 @@ def task_callback_fn(state: str, meta: TaskObject): @current_app.task(bind=True) -def forget_matches(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): +def forget_matches(self, user_id: int, base_name: str, organisation_id: int): """ define values """ From 305959ec05d025a94875af310e02ac5e6c524e13 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 23 Jan 2024 17:48:25 +0100 Subject: [PATCH 161/254] giant refactor --- Dockerfile | 2 + app.py | 22 -- celery_app.py | 17 + docker-compose.yaml | 4 +- wannadb_web/Redis/RedisCache.py | 20 +- wannadb_web/Redis/util.py | 38 +- wannadb_web/routing/core.py | 61 +-- wannadb_web/util.py | 1 + wannadb_web/worker/Web_API.py | 139 +++---- wannadb_web/worker/data.py | 134 ++----- wannadb_web/worker/tasks.py | 634 ++++++++++++++++++-------------- wannadb_web/worker/util.py | 8 +- 12 files changed, 555 insertions(+), 525 deletions(-) create mode 100644 celery_app.py diff --git a/Dockerfile b/Dockerfile index 6f5badb2..51d05a1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,6 +43,8 @@ FROM build as dev #CMD [ "python", "app.py" ] +CMD ["mypy","--install-types", "--non-interactive"] + CMD ["flask", "--app", "app", "--debug", "run","--host","0.0.0.0", "--port", "8000" ] diff --git a/app.py b/app.py index de8a5d05..cfface70 100644 --- a/app.py +++ b/app.py @@ -1,13 +1,9 @@ import logging import os -from celery import Celery, Task from flask import Flask, make_response, render_template_string from flask_cors import CORS from flask_debugtoolbar import DebugToolbarExtension - -from wannadb.resources import ResourceManager -from wannadb_web.Redis.util import RedisConnection from wannadb_web.routing.core import core_routes from wannadb_web.routing.dev import dev_routes from wannadb_web.routing.user import user_management @@ -18,23 +14,6 @@ app = Flask(__name__) -def celery_init_app(_app: Flask) -> Celery: - _app.app_context() - RedisConnection() - ResourceManager() - - class FlaskTask(Task): - - def __call__(self, *args: object, **kwargs: object) -> object: - return self.run(*args, **kwargs) - - celery_app = Celery(_app.name, task_cls=FlaskTask) - celery_app.config_from_object(_app.config) # Use the app's entire configuration - celery_app.set_default() - _app.extensions["celery"] = celery_app - return celery_app - - # Combine Flask and Celery configs app.config.from_mapping( SECRET_KEY='secret!', @@ -52,7 +31,6 @@ def __call__(self, *args: object, **kwargs: object) -> object: toolbar = DebugToolbarExtension(app) -celery = celery_init_app(app) # Register the blueprints app.register_blueprint(main_routes) diff --git a/celery_app.py b/celery_app.py new file mode 100644 index 00000000..742d517e --- /dev/null +++ b/celery_app.py @@ -0,0 +1,17 @@ +import logging +import os + +from celery import Celery + +from wannadb_web.worker.tasks import BaseTask, TestTask, InitManager, CreateDocumentBase + +logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + +app = Celery(__name__) + +app.conf.broker_url = os.environ.get("CELERY_BROKER_URL") + +app.register_task(BaseTask) +app.register_task(TestTask) +app.register_task(InitManager) +app.register_task(CreateDocumentBase) diff --git a/docker-compose.yaml b/docker-compose.yaml index d14364a3..a43b9ab6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -25,7 +25,7 @@ services: dockerfile: Dockerfile target: worker tty: true - command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] + command: ['celery', '-A', 'celery_app', 'worker', '-l', 'info'] env_file: - wannadb_web/.env/.dev volumes: @@ -42,7 +42,7 @@ services: dockerfile: Dockerfile target: worker tty: true - command: ['celery', '-A', 'app.celery', 'flower'] + command: ['celery', '-A', 'celery_app', 'flower'] env_file: - wannadb_web/.env/.dev volumes: diff --git a/wannadb_web/Redis/RedisCache.py b/wannadb_web/Redis/RedisCache.py index 5a8170d3..a7555430 100644 --- a/wannadb_web/Redis/RedisCache.py +++ b/wannadb_web/Redis/RedisCache.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Union import logging from wannadb_web.Redis import util @@ -9,20 +9,26 @@ class RedisCache: def __init__(self, user_id: int) -> None: """Initialize the RedisCache instance for a specific user.""" - self.redis_client = util.Redis_Connection.redis_client + self.redis_client = util.connectRedis() self.user_space_key = f"user:{str(user_id)}" - def set(self, key: str, value: str) -> None: + def set(self, key: str, value: Union[str, bytes, int, float]) -> None: """Set a key-value pair in the user-specific space.""" user_key = f"{self.user_space_key}:{key}" - self.redis_client.set(user_key, value) + self.redis_client.set(name=user_key, value=value) - def get(self, key: str) -> Optional[str]: + def get(self, key: str) -> Optional[Union[str, bytes, int, float]]: """Get the value associated with a key in the user-specific space.""" user_key = f"{self.user_space_key}:{key}" return self.redis_client.get(user_key) + def delete(self, key: str) -> None: + """Delete the key-value pair associated with a key in the user-specific space.""" + user_key = f"{self.user_space_key}:{key}" + self.redis_client.delete(user_key) + + def close(self) -> None: """Close the Redis connection for the user-specific space.""" - self - pass + self.redis_client.close() + self.redis_client = None diff --git a/wannadb_web/Redis/util.py b/wannadb_web/Redis/util.py index 0a20b799..f2bd5d3d 100644 --- a/wannadb_web/Redis/util.py +++ b/wannadb_web/Redis/util.py @@ -1,17 +1,16 @@ -import os -from typing import Optional import logging +import os import redis CACHE_HOST = os.environ.get("CACHE_HOST", "127.0.0.1") CACHE_PORT = int(os.environ.get("CACHE_PORT", 6379)) CACHE_DB = int(os.environ.get("CACHE_DB", 0)) -CACHE_PASSWORD = os.environ.get("CACHE_PASSWORD") +CACHE_PASSWORD = int(os.environ.get("CACHE_PASSWORD", 0)) -logger = logging.getLogger(__name__) +print(CACHE_HOST, CACHE_PORT, CACHE_DB, CACHE_PASSWORD) -Redis_Connection: Optional["RedisConnection"] = None +logger = logging.getLogger(__name__) def connectRedis(): @@ -25,32 +24,3 @@ def connectRedis(): return redis_client except Exception as e: raise Exception("Redis connection failed because:", e) - - -class RedisConnection: - def __init__(self) -> None: - """Initialize the Redis_Connection manager.""" - global Redis_Connection - if Redis_Connection is not None: - logger.error("There can only be one Redis_Connection!") - raise RuntimeError("There can only be one Redis_Connection!") - else: - Redis_Connection = self - self.redis_client = connectRedis() - logger.info("Initialized the Redis_Connection.") - - def __enter__(self) -> "RedisConnection": - """Enter the Redis_Connection context.""" - logger.info("Entered the Redis_Connection.") - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - """Exit the Redis_Connection context.""" - logger.info("Kill all Redis connections") - global Redis_Connection - if Redis_Connection is None: - logger.error("Redis_Connection is None!") - raise RuntimeError("Redis_Connection is None!") - Redis_Connection.redis_client.close() - Redis_Connection = None - logger.info("Exited the resource manager.") diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 0a2db716..171b7336 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -29,16 +29,16 @@ """ import logging.config import pickle +from typing import Optional from celery.result import AsyncResult -from flask import Blueprint, make_response, jsonify, url_for, request +from flask import Blueprint, make_response, request from wannadb.data.data import Attribute from wannadb.statistics import Statistics +from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode -from wannadb_web.worker.data import nugget_to_json -from wannadb_web.worker.tasks import create_document_base_task, long_task, update_document_base -from wannadb_web.worker.util import TaskObject +from wannadb_web.worker.tasks import CreateDocumentBase core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -72,9 +72,10 @@ def create_document(): form = request.form # authorization = request.headers.get("authorization") authorization = form.get("authorization") - organisation_id = form.get("organisationId") + organisation_id: Optional[int] = form.get("organisationId") base_name = form.get("baseName") - document_ids = form.get("document_ids") + document_ids: Optional[list[int]] = form.get("document_ids") + document_ids = [2, 3] attributes_string = form.get("attributes") if (organisation_id is None or base_name is None or document_ids is None or attributes_string is None or authorization is None): @@ -94,11 +95,12 @@ def create_document(): attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - task = create_document_base_task.apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, + task = CreateDocumentBase(user_id).apply_async(args=(document_ids, attributesDump, statisticsDump, base_name, organisation_id)) return make_response({'task_id': task.id}, 202) + @core_routes.route('/document_base/attributes', methods=['UPDATE']) def document_base(): """ @@ -144,30 +146,29 @@ def document_base(): attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - task = update_document_base.apply_async(args=(user_id, attributesDump, statisticsDump, - base_name, organisation_id)) - - return make_response({'task_id': task.id}, 202) - -@core_routes.route('/longtask', methods=['POST']) -def longtask(): - task = long_task.apply_async() - return jsonify(str(task.id)), 202, {'Location': url_for('core_routes.task_status', - task_id=task.id)} +# @core_routes.route('/longtask', methods=['POST']) +# def longtask(): +# task = long_task.apply_async() +# return jsonify(str(task.id)), 202, {'Location': url_for('core_routes.task_status', +# task_id=task.id)} -@core_routes.route('/status/') -def task_status(task_id): +@core_routes.route('/status/', methods=['GET']) +def task_status(task_id: str): task: AsyncResult = AsyncResult(task_id) - meta = task.info - if meta is None: - return make_response({"error": "task not found"}, 404) - if task.status == "FAILURE": - return make_response( - {"state": "FAILURE", "meta": str(meta)}, 500) - if not isinstance(meta, bytes): - return make_response({"error": "task not correct"}, 404) - taskObject = TaskObject.from_dump(meta) - return make_response({"state": taskObject.state.value, "meta": taskObject.signals.to_json()}, - 200) + status = task.status + print(task.info) + if status == "FAILURE": + return make_response({"state": "FAILURE", "meta": str(task.result)}, 500) + if status == "SUCCESS": + return make_response({"state": "SUCCESS", "meta": str(task.result)}, 200) + if status is None: + return make_response({"error": "task not found"}, 500) + return make_response({"state": task.status, "meta": str(task.result)}, 202) + + +@core_routes.route('/status/', methods=['POST']) +def task_update(task_id: str): + redis_client = RedisCache(int(task_id)).redis_client + redis_client.set("input", "test") diff --git a/wannadb_web/util.py b/wannadb_web/util.py index 92b857de..305ca3f8 100644 --- a/wannadb_web/util.py +++ b/wannadb_web/util.py @@ -19,6 +19,7 @@ class Authorisation(Enum): def tokenEncode(obj: dict[str, Any]): + obj["exp"] = datetime.datetime.now() + datetime.timedelta(hours=1) return jwt.encode(obj, _jwtkey, algorithm="HS256") diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 24416d3f..d59fab3b 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -4,10 +4,11 @@ import logging from typing import Optional +import wannadb from wannadb import resources from wannadb.configuration import Pipeline from wannadb.data.data import Attribute, Document, DocumentBase -from wannadb.interaction import EmptyInteractionCallback +from wannadb.interaction import EmptyInteractionCallback, BaseInteractionCallback from wannadb.matching.distance import SignalsMeanDistance from wannadb.matching.matching import RankingBasedMatcher from wannadb.preprocessing.embedding import BERTContextSentenceEmbedder, RelativePositionEmbedder, \ @@ -18,35 +19,54 @@ from wannadb.preprocessing.normalization import CopyNormalizer from wannadb.preprocessing.other_processing import ContextSentenceCacher from wannadb.statistics import Statistics +from wannadb.status import BaseStatusCallback, StatusCallback from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper from wannadb_web.postgres.queries import getDocument_by_name from wannadb_web.postgres.transactions import addDocument -from wannadb_web.worker.util import TaskObject +from wannadb_web.worker.data import Signals logger = logging.getLogger(__name__) class WannaDB_WebAPI: - def __init__(self, user_id: int, task_object: TaskObject, document_base_name: str, organisation_id: int): + def __init__(self, user_id: int, + interaction_callback: BaseInteractionCallback, document_base_name: str, organisation_id: int): + self._document_base: Optional[DocumentBase] = None self.user_id = user_id + self.interaction_callback = interaction_callback + self.signals = Signals(self.user_id) self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") - self.task_object = task_object self.document_base_name = document_base_name - self.document_base: Optional[DocumentBase] = None self.organisation_id = organisation_id - if resources.MANAGER is None: - self.task_object.signals.error.emit(Exception("Resource Manager not initialized!")) + def status_callback_fn(message, progress): + self.signals.status.emit(str(message) + " " + str(progress)) + + self.status_callback = StatusCallback(status_callback_fn) + + if wannadb.resources.MANAGER is None: + self.signals.error.emit(Exception("Resource Manager not initialized!")) raise Exception("Resource Manager not initialized!") if self.sqLiteCacheDBWrapper.cache_db.conn is None: - self.task_object.signals.error.emit(Exception("Cache db could not be initialized!")) + self.signals.error.emit(Exception("Cache db could not be initialized!")) raise Exception("Cache db could not be initialized!") logger.info("WannaDB_WebAPI initialized") + @property + def document_base(self): + return self._document_base + + @document_base.setter + def document_base(self, value: DocumentBase): + if not isinstance(value, DocumentBase): + raise TypeError("Document base must be of type DocumentBase!") + self._document_base = value + self.signals.document_base_to_ui.emit(value) + def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") - self.task_object.signals.status.emit("create_document_base") + self.signals.status.emit("create_document_base") try: self.sqLiteCacheDBWrapper.reset_cache_db() @@ -55,10 +75,10 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") - self.task_object.signals.error.emit(Exception("Document base is inconsistent!")) + self.signals.error.emit(Exception("Document base is inconsistent!")) # load default preprocessing phase - self.task_object.signals.status.emit("Loading preprocessing phase...") + self.signals.status.emit("Loading preprocessing phase...") # noinspection PyTypeChecker preprocessing_phase = Pipeline([ @@ -74,20 +94,18 @@ def create_document_base(self, documents: list[Document], attributes: list[Attri RelativePositionEmbedder() ]) - preprocessing_phase(document_base, EmptyInteractionCallback(), self.task_object.status_callback, statistics) + preprocessing_phase(document_base, EmptyInteractionCallback(), self.status_callback, statistics) self.document_base = document_base - self.task_object.signals.document_base_to_ui.emit(document_base) - self.task_object.signals.statistics.emit(statistics) - self.task_object.signals.finished.emit(1) - self.task_object.signals.status.emit("Finished!") - self.task_object.update(None) + self.signals.statistics.emit(statistics) + self.signals.finished.emit(1) + self.signals.status.emit("Finished!") except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e def load_document_base_from_bson(self): @@ -98,14 +116,14 @@ def load_document_base_from_bson(self): document_id, document = getDocument_by_name(self.document_base_name, self.organisation_id, self.user_id) if not isinstance(document, bytes): logger.error("document is not a DocumentBase!") - self.task_object.signals.error.emit(Exception("document is not a DocumentBase!")) + self.signals.error.emit(Exception("document is not a DocumentBase!")) return document_base = DocumentBase.from_bson(document) if not document_base.validate_consistency(): logger.error("Document base is inconsistent!") - self.task_object.signals.error.emit(Exception("Document base is inconsistent!")) + self.signals.error.emit(Exception("Document base is inconsistent!")) return for attribute in document_base.attributes: @@ -117,14 +135,14 @@ def load_document_base_from_bson(self): except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e def save_document_base_to_bson(self): logger.debug("Called function 'save_document_base_to_bson'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.signals.error.emit(Exception("Document base not loaded!")) return try: document_id = addDocument(self.document_base_name, self.document_base.to_bson(), self.organisation_id, @@ -132,25 +150,26 @@ def save_document_base_to_bson(self): if document_id is None: logger.error("Document base could not be saved to BSON!") elif document_id == -1: - logger.error(f"Document base could not be saved to BSON! Document {self.document_base_name} already exists!") - self.task_object.signals.error.emit( - Exception(f"Document base could not be saved to BSON! Document {self.document_base_name} already exists!")) + logger.error( + f"Document base could not be saved to BSON! Document {self.document_base_name} already exists!") + self.signals.error.emit( + Exception( + f"Document base could not be saved to BSON! Document {self.document_base_name} already exists!")) elif document_id > 0: logger.info(f"Document base saved to BSON with ID {document_id}.") - self.task_object.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") + self.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") return except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e - -# todo: below not implemented yet + # todo: below not implemented yet def save_table_to_csv(self): logger.debug("Called function 'save_table_to_csv'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.signals.error.emit(Exception("Document base not loaded!")) return try: buffer = io.StringIO() @@ -160,7 +179,7 @@ def save_table_to_csv(self): for document in self.document_base.documents: if attribute.name not in document.attribute_mappings.keys(): logger.error("Cannot save a table with unpopulated attributes!") - self.task_object.signals.error.emit( + self.signals.error.emit( Exception("Cannot save a table with unpopulated attributes!")) # TODO: currently stores the text of the first matching nugget (if there is one) @@ -182,30 +201,28 @@ def save_table_to_csv(self): writer.writerows(rows) except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e def add_attribute(self, attribute: Attribute): logger.debug("Called function 'add_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.signals.error.emit(Exception("Document base not loaded!")) elif attribute in self.document_base.attributes: logger.error("Attribute name already exists!") - self.task_object.signals.error.emit(Exception("Attribute name already exists!")) + self.signals.error.emit(Exception("Attribute name already exists!")) else: self.document_base.attributes.append(attribute) logger.debug(f"Attribute '{attribute.name}' added.") - self.task_object.signals.status.emit(f"Attribute '{attribute.name}' added.") + self.signals.status.emit(f"Attribute '{attribute.name}' added.") self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) - self.task_object.update(None) def add_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'add_attributes'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) - self.task_object.update(None) + self.signals.error.emit(Exception("Document base not loaded!")) return already_existing_names = [] @@ -219,15 +236,13 @@ def add_attributes(self, attributes: list[Attribute]): self.document_base.attributes.append(attribute) self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) logger.debug(f"Attribute '{attribute.name}' added.") - self.task_object.update(None) return already_existing_names def remove_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'remove_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) - self.task_object.update(None) + self.signals.error.emit(Exception("Document base not loaded!")) return for attribute in attributes: if attribute in self.document_base.attributes: @@ -239,18 +254,16 @@ def remove_attributes(self, attributes: list[Attribute]): if old_attribute == attribute: self.document_base.attributes.remove(attribute) break - self.task_object.signals.status.emit(f"Attribute '{attribute.name}' removed.") + self.signals.status.emit(f"Attribute '{attribute.name}' removed.") else: logger.error("Attribute name does not exist!") - self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) - self.task_object.update(None) - + self.signals.error.emit(Exception("Attribute name does not exist!")) def forget_matches_for_attribute(self, attribute: Attribute): logger.debug("Called function 'forget_matches_for_attribute'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.signals.error.emit(Exception("Document base not loaded!")) return self.sqLiteCacheDBWrapper.cache_db.delete_table(attribute.name) try: @@ -258,21 +271,21 @@ def forget_matches_for_attribute(self, attribute: Attribute): for document in self.document_base.documents: if attribute.name in document.attribute_mappings.keys(): del document.attribute_mappings[attribute.name] - self.task_object.signals.status.emit(f"Matches for attribute '{attribute.name}' forgotten.") - self.task_object.signals.document_base_to_ui.emit(self.document_base) + self.signals.status.emit(f"Matches for attribute '{attribute.name}' forgotten.") + self.signals.document_base_to_ui.emit(self.document_base) else: logger.error("Attribute name does not exist!") - self.task_object.signals.error.emit(Exception("Attribute name does not exist!")) + self.signals.error.emit(Exception("Attribute name does not exist!")) except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e def forget_matches(self): logger.debug("Called function 'forget_matches'.") if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.signals.error.emit(Exception("Document base not loaded!")) return for attribute in self.document_base.attributes: self.sqLiteCacheDBWrapper.cache_db.delete_table(attribute.name) @@ -280,11 +293,11 @@ def forget_matches(self): try: for document in self.document_base.documents: document.attribute_mappings.clear() - self.task_object.signals.document_base_to_ui.emit(self.document_base) - self.task_object.signals.finished.emit(1) + self.signals.document_base_to_ui.emit(self.document_base) + self.signals.finished.emit(1) except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e ## todo: below not implemented yet @@ -292,10 +305,10 @@ def forget_matches(self): def save_statistics_to_json(self): logger.debug("Called function 'save_statistics_to_json'.") try: - return json.dumps(self.task_object.signals.statistics.to_json(), indent=2) + return json.dumps(self.signals.statistics.to_json(), indent=2) except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e def interactive_table_population(self): @@ -304,11 +317,11 @@ def interactive_table_population(self): try: if self.document_base is None: logger.error("Document base not loaded!") - self.task_object.signals.error.emit(Exception("Document base not loaded!")) + self.signals.error.emit(Exception("Document base not loaded!")) return # load default matching phase - self.task_object.signals.status.emit("Loading matching phase...") + self.signals.status.emit("Loading matching phase...") # TODO: this should not be implemented here! def find_additional_nuggets(nugget, documents): @@ -363,11 +376,11 @@ def find_additional_nuggets(nugget, documents): ] ) - matching_phase(self.document_base, self.task_object.interaction_callback, self.task_object.status_callback, - self.task_object.signals.statistics.msg) - self.task_object.signals.document_base_to_ui.emit(self.document_base) - self.task_object.signals.finished.emit(1) + matching_phase(self.document_base, self.interaction_callback, self.status_callback, + self.signals.statistics.msg()) + self.signals.document_base_to_ui.emit(self.document_base) + self.signals.finished.emit(1) except Exception as e: logger.error(str(e)) - self.task_object.signals.error.emit(e) + self.signals.error.emit(e) raise e diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 86eb1e4d..83a91354 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -1,3 +1,4 @@ +import abc import json from abc import ABC, abstractmethod from dataclasses import dataclass @@ -6,6 +7,7 @@ from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute from wannadb.data.signals import BaseSignal from wannadb.statistics import Statistics +from wannadb_web.Redis.RedisCache import RedisCache def signal_to_json(signal: BaseSignal): @@ -51,15 +53,16 @@ def document_base_to_json(document_base: DocumentBase): class Signals: - def __init__(self): - self.feedback = _Signal("feedback") - self.status = _State("status") - self.finished = _Signal("finished") - self.error = _Error("error") - self.document_base_to_ui = _DocumentBase("document_base_to_ui") - self.statistics = _Statistics("statistics_to_ui") - self.feedback_request_to_ui = _Dump("feedback_request_to_ui") - self.cache_db_to_ui = _Dump("cache_db_to_ui") + def __init__(self, user_id: int): + self.pipeline = _State("pipeline", user_id) + self.feedback = _Signal("feedback", user_id) + self.status = _State("status", user_id) + self.finished = _Signal("finished", user_id) + self.error = _Error("error", user_id) + self.document_base_to_ui = _DocumentBase("document_base_to_ui", user_id) + self.statistics = _Statistics("statistics_to_ui", user_id) + self.feedback_request_to_ui = _Dump("feedback_request_to_ui", user_id) + self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) def to_json(self) -> dict[str, str]: return {self.feedback.type: self.feedback.to_json(), @@ -72,14 +75,15 @@ def to_json(self) -> dict[str, str]: self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} +class Emitable(abc.ABC): -class Emitable(ABC): - __msg: Optional[Any] - - @abstractmethod - def __init__(self, emitable_type: str): + def __init__(self, emitable_type: str, user_id: int): self.type = emitable_type - self.__msg = None + self.redis = RedisCache(user_id) + + @property + def msg(self): + return self.redis.get(self.type) @abstractmethod def to_json(self): @@ -90,136 +94,76 @@ def emit(self, status: Any): raise NotImplementedError -@dataclass class _State(Emitable): - __msg: Optional[str] - - def __init__(self, state_type: str): - super().__init__(state_type) - self.__msg = "" - - @property - def msg(self): - return self.__msg def to_json(self): return str(self.msg) def emit(self, status: str): - self.__msg = status + self.redis.set(self.type, status) -@dataclass class _Signal(Emitable): - __msg: Optional[float] - - def __init__(self, signal_type: str): - super().__init__(signal_type) - self.__msg = None - - @property - def msg(self): - return self.__msg def to_json(self): return str(self.msg) def emit(self, status: float): - self.__msg = status + self.redis.set(self.type, str(status)) -@dataclass class _Error(Emitable): - __msg: Optional[BaseException] - - def __init__(self, error_type: str): - super().__init__(error_type) - self.__msg = None - - @property - def msg(self): - return self.__msg def to_json(self): return str(self.msg) def emit(self, exception: BaseException): - self.__msg = exception + self.redis.set(self.type, str(exception)) -@dataclass class _Nugget(Emitable): - __msg: Optional[InformationNugget] - - def __init__(self, nugget_type: str): - super().__init__(nugget_type) - self.__msg = None - - @property - def msg(self): - return self.__msg def to_json(self): if self.msg is None: return {} - return nugget_to_json(self.msg) + if not isinstance(self.msg, str): + raise TypeError("_Nugget msg must be of type str") + return json.loads(self.msg) - def emit(self, status): - self.__msg = status + def emit(self, status: InformationNugget): + self.redis.set(self.type, json.dumps(nugget_to_json(status))) -@dataclass class _DocumentBase(Emitable): - __msg: Optional[DocumentBase] - - def __init__(self, document_type: str): - super().__init__(document_type) - self.__msg = None - - @property - def msg(self): - return self.__msg def to_json(self): if self.msg is None: return {} - return document_base_to_json(self.msg) + if not isinstance(self.msg, str): + self.redis.delete(self.type) + raise TypeError("_DocumentBase msg must be of type str, type is: " + str(type(self.msg))) + return json.loads(self.msg) - def emit(self, status): - self.__msg = status + def emit(self, status: DocumentBase): + self.redis.set(self.type, json.dumps(document_base_to_json(status))) class _Statistics(Emitable): - __msg: Statistics - def __init__(self, statistics_type: str): - super().__init__(statistics_type) - self.__msg = Statistics(False) - - @property - def msg(self): - return self.__msg + def msg(self) -> "Statistics": + return Statistics(False) def to_json(self): - return self.__msg.to_serializable() - + return Statistics(False).to_serializable() def emit(self, statistic: Statistics): - self.__msg = statistic + pass class _Dump(Emitable): - def __init__(self, dump_type: str): - super().__init__(dump_type) - self.__msg = None - - @property - def msg(self): - return self.__msg def to_json(self): - return json.dumps(self.msg) - + return self.msg def emit(self, status): - self.__msg = status + self.redis.set(self.type, json.dumps(status)) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 46f5fa48..a61ad62f 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -1,285 +1,383 @@ import logging import pickle -import random import time -from typing import Optional +from typing import Optional, Any -from celery import current_app +from celery import Task +from redis import Redis +import wannadb.resources from wannadb.data.data import Document, Attribute +from wannadb.interaction import EmptyInteractionCallback +from wannadb.resources import ResourceManager from wannadb.statistics import Statistics +from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.postgres.queries import getDocuments from wannadb_web.worker.Web_API import WannaDB_WebAPI -from wannadb_web.worker.util import State, TaskUpdate -from wannadb_web.worker.util import TaskObject - -# class U: -# def update_state(*args, **kwargs): -# print('update_state called with args: ', args, ' and kwargs: ', kwargs) -# print("meta: ", TaskObject.from_dump(kwargs.get("meta")).signals.to_json()) - +from wannadb_web.worker.data import Signals +from wannadb_web.worker.util import State logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") -# RedisConnection() -# ResourceManager() -# authorization = ( -# "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyIjoibGVvbiIsImlkIjoxfQ.YM9gwcXeFSku-bz4RUKkymYvA6Af13sxH-BRlnjCCEA") -# _token = tokenDecode(authorization) -# _base_name = "base_name" -# document_ids = [2, 3] -# attribute = Attribute("a") -# statistics = Statistics(False) -# user_id = 1 -# attributesDump = pickle.dumps([attribute]) -# statisticsDump = pickle.dumps(statistics) -# uuuuuuuu = U() - - -@current_app.task(bind=True) -def create_document_base_task(self, user_id, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, - base_name: str, organisation_id: int): - attributes: list[Attribute] = pickle.loads(attributes_dump) - statistics: Statistics = pickle.loads(statistics_dump) - - def task_callback_fn(state: str, meta: TaskObject): - if isinstance(state, str) and state is not None and len(state) > 0: - meta_dump = meta.to_dump() - self.update_state(state=state, meta=meta_dump) +class InitManager(Task): + name = "InitManager" + + def run(self, *args, **kwargs): + ResourceManager() + if wannadb.resources.MANAGER is None: + raise RuntimeError("Resource_Manager is None!") + manager = pickle.dumps(wannadb.resources.MANAGER) + RedisCache(0).set("manager", manager) + + +class BaseTask(Task): + name = "BaseTask" + + def __init__(self, user_id: int = 0): + self._user_id = user_id + self._signals = Signals(user_id) + self._redis_client = RedisCache(user_id) + super().__init__() + + def run(self, *args, **kwargs): + raise NotImplementedError("BaseTask is abstract") + + @staticmethod + def load(): + manager = RedisCache(0).get("manager") + if not isinstance(manager, bytes): + raise RuntimeError("manager is not bytes!") + if manager is None and wannadb.resources.MANAGER is None: + wannadb.resources.ResourceManager() + BaseTask.load() + _MANAGER: Optional["ResourceManager"] = pickle.loads(manager) + wannadb.resources.MANAGER = _MANAGER + + @staticmethod + def save(): + manager = pickle.dumps(wannadb.resources.MANAGER) + RedisCache(0).set("manager", manager) + + def update(self, + state: Optional[State] = None, + meta: Optional[dict[str, Any]] = None, + ) -> None: + if meta: + super().update_state(meta=meta) else: - raise Exception("task_callback_fn error Invalid state") - - task_callback = TaskUpdate(task_callback_fn) - - task_object = TaskObject(task_callback) - - """ - init api - """ + super().update_state(state=str(state.value if state else None), + meta=self._signals.to_json()) + + def update_state(self, + task_id: Optional[str] = None, + state: Optional[str] = None, + meta: Any = None, + **kwargs: Any + ) -> None: + raise NotImplementedError("user update() instead") + + def get_new_input(self): + _input = self._redis_client.get("input") + if _input is not None: + pass + + return _input + + +class TestTask(BaseTask): + name = "TestTask" + + def run(self, *args, **kwargs): + super().run() + self.update(state=State.PENDING) + while True: + _input = self.get_new_input() + if _input is not None: + print(_input) + self.update(state=State.SUCCESS, meta={"msg": _input}) + time.sleep(2) + self.update(state=State.WAITING, meta={"msg": "waiting"}) + time.sleep(2) + + +class CreateDocumentBase(BaseTask): + name = "CreateDocumentBase" + + def run(self, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, + base_name: str, organisation_id: int): + self.load() + attributes: list[Attribute] = pickle.loads(attributes_dump) + statistics: Statistics = pickle.loads(statistics_dump) - api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - task_object.check() - try: """ - Creating document base + init api """ - if not isinstance(attributes[0], Attribute): - task_object.update(State.FAILURE) - raise Exception("Invalid attributes") - - if not isinstance(statistics, Statistics): - task_object.update(State.FAILURE) - raise Exception("Invalid statistics") - - docs = getDocuments(document_ids, user_id) - task_object.update(State.PENDING) - documents = [] - if docs: - for doc in docs: - documents.append(Document(doc[0], doc[1])) - else: - print("No documents found") - - api.create_document_base(documents, attributes, statistics) - task_object.check() - - api.save_document_base_to_bson() - - if task_object.signals.finished.msg is None: - task_object.update(State.ERROR) - raise Exception("task_object signals not set?") - - task_object.update(State.SUCCESS) - return task_object.to_dump() - - except Exception as e: - # task_object.update(State.FAILURE, str(e)) - raise e - - -@current_app.task(bind=True) -def add_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): - """ - define values - """ - - def task_callback_fn(state: str, meta: TaskObject): - if isinstance(state, str) and state is not None and len(state) > 0: - meta_dump = meta.to_dump() - self.update_state(state=state, meta=meta_dump) - else: - raise Exception("task_callback_fn error Invalid state") - - task_callback = TaskUpdate(task_callback_fn) - - task_object = TaskObject(task_callback) - - """ - init api - """ - - api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - task_object.check() - - api.load_document_base_from_bson() - task_object.check() - - if attributes_dump is not None: - attributes: list[Attribute] = pickle.loads(attributes_dump) - api.add_attributes(attributes) - if task_object.signals.error.msg: - task_object.update(State.FAILURE) - raise task_object.signals.error.msg - - api.save_document_base_to_bson() - task_object.check() - task_object.update(state=State.SUCCESS) - - -@current_app.task(bind=True) -def remove_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): - """ - define values - """ - - def task_callback_fn(state: str, meta: TaskObject): - if isinstance(state, str) and state is not None and len(state) > 0: - meta_dump = meta.to_dump() - self.update_state(state=state, meta=meta_dump) - else: - raise Exception("task_callback_fn error Invalid state") - - task_callback = TaskUpdate(task_callback_fn) - - task_object = TaskObject(task_callback) - - """ - init api - """ - - api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - task_object.check() - - api.load_document_base_from_bson() - task_object.check() - - if attributes_dump is not None: - attributes: list[Attribute] = pickle.loads(attributes_dump) - api.remove_attributes(attributes) - if task_object.signals.error.msg: - task_object.update(State.FAILURE) - raise task_object.signals.error.msg - - api.save_document_base_to_bson() - task_object.check() - task_object.update(state=State.SUCCESS) - - -@current_app.task(bind=True) -def forget_matches_for_attribute(self, user_id: int, attribute_dump: Optional[bytes], base_name: str, - organisation_id: int): - """ - define values - """ - - def task_callback_fn(state: str, meta: TaskObject): - if isinstance(state, str) and state is not None and len(state) > 0: - meta_dump = meta.to_dump() - self.update_state(state=state, meta=meta_dump) - else: - raise Exception("task_callback_fn error Invalid state") - - task_callback = TaskUpdate(task_callback_fn) - - task_object = TaskObject(task_callback) - - """ - init api - """ - - api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - task_object.check() - - api.load_document_base_from_bson() - task_object.check() - - if attribute_dump is not None: - attribute: Attribute = pickle.loads(attribute_dump) - api.forget_matches_for_attribute(attribute) - if task_object.signals.error.msg: - task_object.update(State.FAILURE) - raise task_object.signals.error.msg - - api.save_document_base_to_bson() - task_object.check() - task_object.update(state=State.SUCCESS) - - -@current_app.task(bind=True) -def forget_matches(self, user_id: int, base_name: str, organisation_id: int): - """ - define values - """ - - def task_callback_fn(state: str, meta: TaskObject): - if isinstance(state, str) and state is not None and len(state) > 0: - meta_dump = meta.to_dump() - self.update_state(state=state, meta=meta_dump) - else: - raise Exception("task_callback_fn error Invalid state") - - task_callback = TaskUpdate(task_callback_fn) - - task_object = TaskObject(task_callback) - - """ - init api - """ - - api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) - task_object.check() - - api.load_document_base_from_bson() - task_object.check() - - api.forget_matches() - task_object.check() - - api.save_document_base_to_bson() - task_object.check() - task_object.update(state=State.SUCCESS) - - -@current_app.task(bind=True) -def long_task(self): - try: - """Background task that runs a long function with progress reports.""" - verb = ['Starting up', 'Booting', 'Repairing', 'Loading', 'Checking'] - adjective = ['master', 'radiant', 'silent', 'harmonic', 'fast'] - noun = ['solar array', 'particle reshaper', 'cosmic ray', 'orbiter', 'bit'] - data = '' - total = random.randint(10, 50) - - def task_callback_fn(state: str, meta: TaskObject): - if not isinstance(state, str): - raise Exception("task_callback_fn error Invalid state") - meta_dump = meta.to_dump() - self.update_state(state=state, meta=meta_dump) - - task_callback = TaskUpdate(task_callback_fn) - - task_object = TaskObject(task_callback) - - for i in range(total): - if not data or random.random() < 0.25: - data = '{0} {1} {2}...'.format(random.choice(verb), - random.choice(adjective), - random.choice(noun)) - time.sleep(1) - task_object.update(state=State.PENDING) - task_object.update(state=State.SUCCESS) - return data - except Exception as e: - self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) - raise + api = WannaDB_WebAPI(self._user_id, EmptyInteractionCallback(), base_name, organisation_id) + try: + """ + Creating document base + """ + if not isinstance(attributes[0], Attribute): + self.update(State.ERROR) + raise Exception("Invalid attributes") + + if not isinstance(statistics, Statistics): + self.update(State.ERROR) + raise Exception("Invalid statistics") + + docs = getDocuments(document_ids, self._user_id) + self.update(State.PENDING) + documents = [] + if docs: + for doc in docs: + documents.append(Document(doc[0], doc[1])) + else: + print("No documents found") + + api.create_document_base(documents, attributes, statistics) + + api.save_document_base_to_bson() + + self.update(State.SUCCESS) + return self + finally: + self.save() + +# +# +# @app.task(bind=True) +# def add_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): +# """ +# define values +# """ +# +# def task_callback_fn(state: str, meta: TaskObject): +# if isinstance(state, str) and state is not None and len(state) > 0: +# meta_dump = meta.to_dump() +# self.update_state(state=state, meta=meta_dump) +# else: +# raise Exception("task_callback_fn error Invalid state") +# +# task_callback = TaskCallback(task_callback_fn) +# +# task_object = TaskObject(task_callback) +# +# """ +# init api +# """ +# +# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) +# task_object.check() +# +# api.load_document_base_from_bson() +# task_object.check() +# +# if attributes_dump is not None: +# attributes: list[Attribute] = pickle.loads(attributes_dump) +# api.add_attributes(attributes) +# if task_object.signals.error.msg: +# task_object.update(State.FAILURE) +# raise task_object.signals.error.msg +# +# api.save_document_base_to_bson() +# task_object.check() +# task_object.update(state=State.SUCCESS) +# +# +# @app.task(bind=True) +# def remove_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): +# """ +# define values +# """ +# +# def task_callback_fn(state: str, meta: TaskObject): +# if isinstance(state, str) and state is not None and len(state) > 0: +# meta_dump = meta.to_dump() +# self.update_state(state=state, meta=meta_dump) +# else: +# raise Exception("task_callback_fn error Invalid state") +# +# task_callback = TaskCallback(task_callback_fn) +# +# task_object = TaskObject(task_callback) +# +# """ +# init api +# """ +# +# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) +# task_object.check() +# +# api.load_document_base_from_bson() +# task_object.check() +# +# if attributes_dump is not None: +# attributes: list[Attribute] = pickle.loads(attributes_dump) +# api.remove_attributes(attributes) +# if task_object.signals.error.msg: +# task_object.update(State.FAILURE) +# raise task_object.signals.error.msg +# +# api.save_document_base_to_bson() +# task_object.check() +# task_object.update(state=State.SUCCESS) +# +# +# @app.task(bind=True) +# def forget_matches_for_attribute(self, user_id: int, attribute_dump: Optional[bytes], base_name: str, +# organisation_id: int): +# """ +# define values +# """ +# +# def task_callback_fn(state: str, meta: TaskObject): +# if isinstance(state, str) and state is not None and len(state) > 0: +# meta_dump = meta.to_dump() +# self.update_state(state=state, meta=meta_dump) +# else: +# raise Exception("task_callback_fn error Invalid state") +# +# task_callback = TaskCallback(task_callback_fn) +# +# task_object = TaskObject(task_callback) +# +# """ +# init api +# """ +# +# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) +# task_object.check() +# +# api.load_document_base_from_bson() +# task_object.check() +# +# if attribute_dump is not None: +# attribute: Attribute = pickle.loads(attribute_dump) +# api.forget_matches_for_attribute(attribute) +# if task_object.signals.error.msg: +# task_object.update(State.FAILURE) +# raise task_object.signals.error.msg +# +# api.save_document_base_to_bson() +# task_object.check() +# task_object.update(state=State.SUCCESS) +# +# +# @app.task(bind=True) +# def forget_matches(self, user_id: int, base_name: str, organisation_id: int): +# """ +# define values +# """ +# +# def task_callback_fn(state: str, meta: TaskObject): +# if isinstance(state, str) and state is not None and len(state) > 0: +# meta_dump = meta.to_dump() +# self.update_state(state=state, meta=meta_dump) +# else: +# raise Exception("task_callback_fn error Invalid state") +# +# task_callback = TaskCallback(task_callback_fn) +# +# task_object = TaskObject(task_callback) +# +# """ +# init api +# """ +# +# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) +# task_object.check() +# +# api.load_document_base_from_bson() +# task_object.check() +# +# api.forget_matches() +# task_object.check() +# +# api.save_document_base_to_bson() +# task_object.check() +# task_object.update(state=State.SUCCESS) +# +# +# @app.task(bind=True) +# def interactive_table_population(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, +# organisation_id: int): +# """ +# define values +# """ +# +# def task_callback_fn(state: str, meta: TaskObject): +# if isinstance(state, str) and state is not None and len(state) > 0: +# meta_dump = meta.update().to_dump() +# self.update_state(state=state, meta=meta_dump) +# else: +# raise Exception("task_callback_fn error Invalid state") +# +# task_callback = TaskCallback(task_callback_fn) +# +# def interaction_callback_fn(pipeline_element_identifier, feedback_request): +# feedback_request["identifier"] = pipeline_element_identifier +# self.feedback_request_to_ui.emit(feedback_request) +# +# self.feedback_mutex.lock() +# try: +# self.feedback_cond.wait(self.feedback_mutex) +# finally: +# self.feedback_mutex.unlock() +# +# return self.feedback +# +# interaction_callback = InteractionCallback(interaction_callback_fn) +# +# task_object = TaskObject(task_callback) +# +# """ +# init api +# """ +# +# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) +# task_object.check() +# +# api.load_document_base_from_bson() +# task_object.check() +# +# api.forget_matches() +# task_object.check() +# +# api.save_document_base_to_bson() +# task_object.check() +# task_object.update(state=State.SUCCESS) +# +# +# @app.task(bind=True) +# def long_task(self): +# try: +# """Background task that runs a long function with progress reports.""" +# verb = ['Starting up', 'Booting', 'Repairing', 'Loading', 'Checking'] +# adjective = ['master', 'radiant', 'silent', 'harmonic', 'fast'] +# noun = ['solar array', 'particle reshaper', 'cosmic ray', 'orbiter', 'bit'] +# data = '' +# total = random.randint(10, 50) +# +# def task_callback_fn(state: str, meta: TaskObject): +# if not isinstance(state, str): +# raise Exception("task_callback_fn error Invalid state") +# meta_dump = meta.to_dump() +# self.update_state(state=state, meta=meta_dump) +# +# task_callback = TaskCallback(task_callback_fn) +# +# task_object = TaskObject(task_callback) +# +# for i in range(total): +# if not data or random.random() < 0.25: +# data = '{0} {1} {2}...'.format(random.choice(verb), +# random.choice(adjective), +# random.choice(noun)) +# time.sleep(1) +# task_object.update(state=State.PENDING) +# task_object.update(state=State.SUCCESS) +# return data +# except Exception as e: +# self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) +# raise diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index c4b571d6..da11ca83 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -8,7 +8,7 @@ from wannadb_web.worker.data import Signals -class TaskUpdate: +class TaskCallback: """Task callback that is initialized with a callback function.""" def __init__(self, callback_fn: Callable[[str, Any], None]): @@ -25,9 +25,9 @@ def __call__(self, state: str, context: Any) -> None: class State(enum.Enum): STARTED = 'STARTED' + WAITING = 'WAITING' PENDING = 'PENDING' SUCCESS = 'SUCCESS' - FAILURE = 'FAILURE' ERROR = 'ERROR' @@ -35,7 +35,7 @@ class State(enum.Enum): class TaskObject: """Class for representing the response of a task.""" - task_update_fn: Optional[TaskUpdate] + task_update_fn: Optional[TaskCallback] __signals: Signals = field(default_factory=Signals) __state: State = State.STARTED @@ -104,5 +104,5 @@ def from_dump(dump: bytes): def check(self): self.update(None) if self.signals.error.msg: - self.update(State.FAILURE) + self.update(State.ERROR) raise self.signals.error.msg From dc6c73e7a128137a3191d081e22a8cad41fa5931 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 23 Jan 2024 18:33:08 +0100 Subject: [PATCH 162/254] add user id bag to run --- wannadb_web/routing/core.py | 5 ++--- wannadb_web/worker/tasks.py | 21 +++++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 171b7336..da3ea14c 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -94,9 +94,8 @@ def create_document(): attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - - task = CreateDocumentBase(user_id).apply_async(args=(document_ids, attributesDump, statisticsDump, - base_name, organisation_id)) + task = CreateDocumentBase().apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, + base_name, organisation_id)) return make_response({'task_id': task.id}, 202) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index a61ad62f..16bb5fdc 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -4,7 +4,6 @@ from typing import Optional, Any from celery import Task -from redis import Redis import wannadb.resources from wannadb.data.data import Document, Attribute @@ -33,11 +32,10 @@ def run(self, *args, **kwargs): class BaseTask(Task): name = "BaseTask" + _signals: Optional[Signals] = None + _redis_client: Optional[RedisCache] = None - def __init__(self, user_id: int = 0): - self._user_id = user_id - self._signals = Signals(user_id) - self._redis_client = RedisCache(user_id) + def __init__(self): super().__init__() def run(self, *args, **kwargs): @@ -65,6 +63,8 @@ def update(self, ) -> None: if meta: super().update_state(meta=meta) + if self._signals is None: + raise RuntimeError("self._signals is None!") else: super().update_state(state=str(state.value if state else None), meta=self._signals.to_json()) @@ -78,6 +78,8 @@ def update_state(self, raise NotImplementedError("user update() instead") def get_new_input(self): + if self._redis_client is None: + raise RuntimeError("self._redis_client is None!") _input = self._redis_client.get("input") if _input is not None: pass @@ -104,16 +106,19 @@ def run(self, *args, **kwargs): class CreateDocumentBase(BaseTask): name = "CreateDocumentBase" - def run(self, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, + def run(self, user_id: int, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, base_name: str, organisation_id: int): + self._signals = Signals(user_id) + self._redis_client = RedisCache(user_id) self.load() attributes: list[Attribute] = pickle.loads(attributes_dump) statistics: Statistics = pickle.loads(statistics_dump) + print(user_id) """ init api """ - api = WannaDB_WebAPI(self._user_id, EmptyInteractionCallback(), base_name, organisation_id) + api = WannaDB_WebAPI(user_id, EmptyInteractionCallback(), base_name, organisation_id) try: """ Creating document base @@ -126,7 +131,7 @@ def run(self, document_ids: list[int], attributes_dump: bytes, statistics_dump: self.update(State.ERROR) raise Exception("Invalid statistics") - docs = getDocuments(document_ids, self._user_id) + docs = getDocuments(document_ids, user_id) self.update(State.PENDING) documents = [] if docs: From 61fe07a394f54473c5c159a06b50b45ba4f59d59 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 23 Jan 2024 18:36:48 +0100 Subject: [PATCH 163/254] rm old data management --- wannadb_web/worker/util.py | 98 -------------------------------------- 1 file changed, 98 deletions(-) diff --git a/wannadb_web/worker/util.py b/wannadb_web/worker/util.py index da11ca83..48278c6a 100644 --- a/wannadb_web/worker/util.py +++ b/wannadb_web/worker/util.py @@ -1,26 +1,4 @@ import enum -import pickle -from dataclasses import dataclass, field -from typing import Callable, Any, Optional - -from wannadb.interaction import InteractionCallback -from wannadb.status import StatusCallback -from wannadb_web.worker.data import Signals - - -class TaskCallback: - """Task callback that is initialized with a callback function.""" - - def __init__(self, callback_fn: Callable[[str, Any], None]): - """ - Initialize the Task callback. - - :param callback_fn: callback function that is called whenever the interaction callback is called - """ - self._callback_fn: Callable[[str, Any], None] = callback_fn - - def __call__(self, state: str, context: Any) -> None: - return self._callback_fn(state, context) class State(enum.Enum): @@ -30,79 +8,3 @@ class State(enum.Enum): SUCCESS = 'SUCCESS' ERROR = 'ERROR' - -@dataclass -class TaskObject: - """Class for representing the response of a task.""" - - task_update_fn: Optional[TaskCallback] - __signals: Signals = field(default_factory=Signals) - __state: State = State.STARTED - - @property - def status_callback(self): - def status_callback_fn(message, progress) -> None: - m = str(message) - p = str(progress) - - self.signals.status.emit(m + ":" + p) - self.update(State.PENDING) - - return StatusCallback(status_callback_fn) - - @property - def interaction_callback(self): - def interaction_callback_fn(pipeline_element_identifier, feedback_request): - feedback_request["identifier"] = pipeline_element_identifier - self.signals.feedback_request_to_ui.emit(feedback_request) - self.update(State.PENDING) - return self.signals.feedback - - return InteractionCallback(interaction_callback_fn) - - @property - def state(self) -> State: - return self.__state - - @state.setter - def state(self, state: State): - if not isinstance(state, State): - print("update error Invalid state", state) - raise Exception("update error Invalid state") - if state is None: - print("update error State is none", state) - raise Exception("update error State is none") - self.__state = state - - @property - def signals(self) -> Signals: - return self.__signals - - @signals.setter - def signals(self, signals: Signals): - self.__signals = signals - - def update(self, state: Optional[State]): - if self.task_update_fn is None: - raise Exception("update error task_update_fn is None do you want to update here?") - if isinstance(state, State) and state is not None: - self.state = state - self.task_update_fn(self.state.value, self) - - def to_dump(self): - _state = self.state - _signals = self.signals - return pickle.dumps((_state, _signals)) - - @staticmethod - def from_dump(dump: bytes): - state, signals = pickle.loads(dump) - to = TaskObject(None,state) - to.signals = signals - return to - - def check(self): - self.update(None) - if self.signals.error.msg: - self.update(State.ERROR) - raise self.signals.error.msg From a02cb0368d2456e8fb85c18db1271bccc0309dfc Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 23 Jan 2024 18:37:05 +0100 Subject: [PATCH 164/254] rm print --- wannadb_web/worker/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 16bb5fdc..adb8a05b 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -113,7 +113,6 @@ def run(self, user_id: int, document_ids: list[int], attributes_dump: bytes, sta self.load() attributes: list[Attribute] = pickle.loads(attributes_dump) statistics: Statistics = pickle.loads(statistics_dump) - print(user_id) """ init api From 66f8316db386e9a094c9939f73f14ef82cac2cf8 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 00:08:56 +0100 Subject: [PATCH 165/254] rm save and load --- wannadb_web/Redis/RedisCache.py | 10 +++++ wannadb_web/routing/core.py | 4 +- wannadb_web/worker/tasks.py | 69 +++++++++++++-------------------- 3 files changed, 38 insertions(+), 45 deletions(-) diff --git a/wannadb_web/Redis/RedisCache.py b/wannadb_web/Redis/RedisCache.py index a7555430..0817fb7b 100644 --- a/wannadb_web/Redis/RedisCache.py +++ b/wannadb_web/Redis/RedisCache.py @@ -17,6 +17,16 @@ def set(self, key: str, value: Union[str, bytes, int, float]) -> None: user_key = f"{self.user_space_key}:{key}" self.redis_client.set(name=user_key, value=value) + def sadd(self, key: str, *values: Union[str, bytes, int, float]) -> None: + """Set a key-value pair in the user-specific space.""" + user_key = f"{self.user_space_key}:{key}" + self.redis_client.sadd(name=user_key, value=values) + + def spop(self, key: str) -> Optional[set]: + """Set a key-value pair in the user-specific space.""" + user_key = f"{self.user_space_key}:{key}" + return self.redis_client.smembers(name=user_key) + def get(self, key: str) -> Optional[Union[str, bytes, int, float]]: """Get the value associated with a key in the user-specific space.""" user_key = f"{self.user_space_key}:{key}" diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index da3ea14c..bc85ccc3 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -38,7 +38,7 @@ from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode -from wannadb_web.worker.tasks import CreateDocumentBase +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -155,7 +155,7 @@ def document_base(): @core_routes.route('/status/', methods=['GET']) def task_status(task_id: str): - task: AsyncResult = AsyncResult(task_id) + task: AsyncResult = BaseTask().AsyncResult(task_id=task_id) status = task.status print(task.info) if status == "FAILURE": diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index adb8a05b..6ddc6cec 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -43,31 +43,17 @@ def run(self, *args, **kwargs): @staticmethod def load(): - manager = RedisCache(0).get("manager") - if not isinstance(manager, bytes): - raise RuntimeError("manager is not bytes!") - if manager is None and wannadb.resources.MANAGER is None: + if wannadb.resources.MANAGER is None: wannadb.resources.ResourceManager() BaseTask.load() - _MANAGER: Optional["ResourceManager"] = pickle.loads(manager) - wannadb.resources.MANAGER = _MANAGER - - @staticmethod - def save(): - manager = pickle.dumps(wannadb.resources.MANAGER) - RedisCache(0).set("manager", manager) + return + logging.info("loaded") def update(self, - state: Optional[State] = None, + state: State, meta: Optional[dict[str, Any]] = None, ) -> None: - if meta: - super().update_state(meta=meta) - if self._signals is None: - raise RuntimeError("self._signals is None!") - else: - super().update_state(state=str(state.value if state else None), - meta=self._signals.to_json()) + super().update_state(state=state.value, meta=meta) def update_state(self, task_id: Optional[str] = None, @@ -118,35 +104,32 @@ def run(self, user_id: int, document_ids: list[int], attributes_dump: bytes, sta init api """ api = WannaDB_WebAPI(user_id, EmptyInteractionCallback(), base_name, organisation_id) - try: - """ - Creating document base - """ - if not isinstance(attributes[0], Attribute): - self.update(State.ERROR) - raise Exception("Invalid attributes") - if not isinstance(statistics, Statistics): - self.update(State.ERROR) - raise Exception("Invalid statistics") + """ + Creating document base + """ + if not isinstance(attributes[0], Attribute): + self.update(State.ERROR) + raise Exception("Invalid attributes") - docs = getDocuments(document_ids, user_id) - self.update(State.PENDING) - documents = [] - if docs: - for doc in docs: - documents.append(Document(doc[0], doc[1])) - else: - print("No documents found") + if not isinstance(statistics, Statistics): + self.update(State.ERROR) + raise Exception("Invalid statistics") - api.create_document_base(documents, attributes, statistics) + docs = getDocuments(document_ids, user_id) + self.update(State.PENDING) + documents = [] + if docs: + for doc in docs: + documents.append(Document(doc[0], doc[1])) + else: + print("No documents found") - api.save_document_base_to_bson() + api.create_document_base(documents, attributes, statistics) - self.update(State.SUCCESS) - return self - finally: - self.save() + api.save_document_base_to_bson() + self.update(State.SUCCESS) + return self # # From 1d6285a858e1c0b9943fea300532f7a0ba92ce48 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 01:06:08 +0100 Subject: [PATCH 166/254] adj task_status --- wannadb_web/Redis/RedisCache.py | 4 ++-- wannadb_web/routing/core.py | 8 ++++---- wannadb_web/worker/data.py | 29 +++++++++++++++-------------- wannadb_web/worker/tasks.py | 6 +++--- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/wannadb_web/Redis/RedisCache.py b/wannadb_web/Redis/RedisCache.py index 0817fb7b..b4764984 100644 --- a/wannadb_web/Redis/RedisCache.py +++ b/wannadb_web/Redis/RedisCache.py @@ -7,10 +7,10 @@ class RedisCache: - def __init__(self, user_id: int) -> None: + def __init__(self, user_id: str) -> None: """Initialize the RedisCache instance for a specific user.""" self.redis_client = util.connectRedis() - self.user_space_key = f"user:{str(user_id)}" + self.user_space_key = f"user:{user_id}" def set(self, key: str, value: Union[str, bytes, int, float]) -> None: """Set a key-value pair in the user-specific space.""" diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index bc85ccc3..ada312ef 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -38,6 +38,7 @@ from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode +from wannadb_web.worker.data import Signals from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -157,14 +158,13 @@ def document_base(): def task_status(task_id: str): task: AsyncResult = BaseTask().AsyncResult(task_id=task_id) status = task.status - print(task.info) if status == "FAILURE": - return make_response({"state": "FAILURE", "meta": str(task.result)}, 500) + return make_response({"state": "FAILURE", "meta": Signals(task_id)}, 500) if status == "SUCCESS": - return make_response({"state": "SUCCESS", "meta": str(task.result)}, 200) + return make_response({"state": "SUCCESS", "meta": Signals(task_id)}, 200) if status is None: return make_response({"error": "task not found"}, 500) - return make_response({"state": task.status, "meta": str(task.result)}, 202) + return make_response({"state": task.status, "meta": Signals(task_id)}, 202) @core_routes.route('/status/', methods=['POST']) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 83a91354..54aa30fd 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -1,8 +1,7 @@ import abc import json -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional, Any +from abc import abstractmethod +from typing import Any from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute from wannadb.data.signals import BaseSignal @@ -53,7 +52,7 @@ def document_base_to_json(document_base: DocumentBase): class Signals: - def __init__(self, user_id: int): + def __init__(self, user_id: str): self.pipeline = _State("pipeline", user_id) self.feedback = _Signal("feedback", user_id) self.status = _State("status", user_id) @@ -77,13 +76,16 @@ def to_json(self) -> dict[str, str]: class Emitable(abc.ABC): - def __init__(self, emitable_type: str, user_id: int): + def __init__(self, emitable_type: str, user_id: str): self.type = emitable_type self.redis = RedisCache(user_id) @property def msg(self): - return self.redis.get(self.type) + msg = self.redis.get(self.type) + if msg is None: + return None + return msg @abstractmethod def to_json(self): @@ -97,7 +99,7 @@ def emit(self, status: Any): class _State(Emitable): def to_json(self): - return str(self.msg) + return self.msg.decode("utf-8") def emit(self, status: str): self.redis.set(self.type, status) @@ -115,7 +117,7 @@ def emit(self, status: float): class _Error(Emitable): def to_json(self): - return str(self.msg) + return self.msg.decode("utf-8") def emit(self, exception: BaseException): self.redis.set(self.type, str(exception)) @@ -128,7 +130,7 @@ def to_json(self): return {} if not isinstance(self.msg, str): raise TypeError("_Nugget msg must be of type str") - return json.loads(self.msg) + return self.msg def emit(self, status: InformationNugget): self.redis.set(self.type, json.dumps(nugget_to_json(status))) @@ -139,9 +141,6 @@ class _DocumentBase(Emitable): def to_json(self): if self.msg is None: return {} - if not isinstance(self.msg, str): - self.redis.delete(self.type) - raise TypeError("_DocumentBase msg must be of type str, type is: " + str(type(self.msg))) return json.loads(self.msg) def emit(self, status: DocumentBase): @@ -150,8 +149,10 @@ def emit(self, status: DocumentBase): class _Statistics(Emitable): - def msg(self) -> "Statistics": - return Statistics(False) + + @property + def msg(self): + return "not implemented" def to_json(self): return Statistics(False).to_serializable() diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 6ddc6cec..894a7cbc 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -27,7 +27,7 @@ def run(self, *args, **kwargs): if wannadb.resources.MANAGER is None: raise RuntimeError("Resource_Manager is None!") manager = pickle.dumps(wannadb.resources.MANAGER) - RedisCache(0).set("manager", manager) + RedisCache("0").set("manager", manager) class BaseTask(Task): @@ -94,8 +94,8 @@ class CreateDocumentBase(BaseTask): def run(self, user_id: int, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, base_name: str, organisation_id: int): - self._signals = Signals(user_id) - self._redis_client = RedisCache(user_id) + self._signals = Signals(str(self.request.id)) + self._redis_client = RedisCache(str(self.request.id)) self.load() attributes: list[Attribute] = pickle.loads(attributes_dump) statistics: Statistics = pickle.loads(statistics_dump) From 40c90620098868121ce70d5aa3fbd25b14bd7988 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 12:52:32 +0100 Subject: [PATCH 167/254] adj(create_document): change types --- wannadb_web/routing/core.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index ada312ef..70b4fe7b 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -76,9 +76,8 @@ def create_document(): organisation_id: Optional[int] = form.get("organisationId") base_name = form.get("baseName") document_ids: Optional[list[int]] = form.get("document_ids") - document_ids = [2, 3] - attributes_string = form.get("attributes") - if (organisation_id is None or base_name is None or document_ids is None or attributes_string is None + attributes_strings = form.get("attributes") + if (organisation_id is None or base_name is None or document_ids is None or attributes_strings is None or authorization is None): return make_response({"error": "missing parameters"}, 400) _token = tokenDecode(authorization) @@ -86,16 +85,11 @@ def create_document(): if _token is False: return make_response({"error": "invalid token"}, 401) - attributes = [] - for att in attributes_string: - attributes.append(Attribute(att)) - statistics = Statistics(False) user_id = _token.id - attributesDump = pickle.dumps(attributes) statisticsDump = pickle.dumps(statistics) - task = CreateDocumentBase().apply_async(args=(user_id, document_ids, attributesDump, statisticsDump, + task = CreateDocumentBase().apply_async(args=(user_id, document_ids, attributes_strings, statisticsDump, base_name, organisation_id)) return make_response({'task_id': task.id}, 202) @@ -169,5 +163,5 @@ def task_status(task_id: str): @core_routes.route('/status/', methods=['POST']) def task_update(task_id: str): - redis_client = RedisCache(int(task_id)).redis_client + redis_client = RedisCache(task_id).redis_client redis_client.set("input", "test") From 1c924f03e8c6e17d5965109f93accc6e1daba9f9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 12:53:18 +0100 Subject: [PATCH 168/254] fix: potential bug prevented when accessing --- wannadb_web/postgres/queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 2fcd17dc..b4c43441 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -270,7 +270,7 @@ def getDocuments(document_ids: list[int], user_id: int): content = document[2] b_documents.append((str(name), bytes(content))) return b_documents - return [] + return [(None,None)] def getDocument_ids(organisation_id: int, user_id: int): From 9196d8e57c3213ecca35ba1970e9fa7ca155cd65 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 12:53:58 +0100 Subject: [PATCH 169/254] add(run): check for attr consistency --- wannadb_web/worker/tasks.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 894a7cbc..cc4c5142 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -17,6 +17,7 @@ from wannadb_web.worker.util import State logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger() class InitManager(Task): @@ -92,14 +93,23 @@ def run(self, *args, **kwargs): class CreateDocumentBase(BaseTask): name = "CreateDocumentBase" - def run(self, user_id: int, document_ids: list[int], attributes_dump: bytes, statistics_dump: bytes, + def run(self, user_id: int, document_ids: list[int], attributes_strings: list[str], statistics_dump: bytes, base_name: str, organisation_id: int): self._signals = Signals(str(self.request.id)) self._redis_client = RedisCache(str(self.request.id)) self.load() - attributes: list[Attribute] = pickle.loads(attributes_dump) + attributes: list[Attribute] = [] statistics: Statistics = pickle.loads(statistics_dump) + for attribute_string in attributes_strings: + if attribute_string == "": + logger.error("Attribute names cannot be empty!") + raise Exception("Attribute names cannot be empty!") + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") + attributes.append(Attribute(attribute_string)) + """ init api """ @@ -118,12 +128,13 @@ def run(self, user_id: int, document_ids: list[int], attributes_dump: bytes, sta docs = getDocuments(document_ids, user_id) self.update(State.PENDING) - documents = [] + documents: list[Document] = [] if docs: for doc in docs: documents.append(Document(doc[0], doc[1])) else: - print("No documents found") + self.update(State.ERROR) + raise Exception("No documents found") api.create_document_base(documents, attributes, statistics) From 481dbbdcf52f4d10bcd89554989fbaaad37a54e6 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 12:54:39 +0100 Subject: [PATCH 170/254] fix: type adj --- wannadb_web/worker/Web_API.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index d59fab3b..9ff3d50c 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -35,7 +35,7 @@ def __init__(self, user_id: int, self._document_base: Optional[DocumentBase] = None self.user_id = user_id self.interaction_callback = interaction_callback - self.signals = Signals(self.user_id) + self.signals = Signals(str(self.user_id)) self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") self.document_base_name = document_base_name self.organisation_id = organisation_id From 1545484c390de9ba29a57785f0cb93f5df8f3ec0 Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 13:40:06 +0100 Subject: [PATCH 171/254] bug fixes --- wannadb_web/routing/core.py | 11 +++++------ wannadb_web/worker/tasks.py | 12 ++++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 70b4fe7b..7f5386e8 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -62,12 +62,8 @@ def create_document(): { "organisationId": "your_organisation_id", "baseName": "your_document_base_name", - "document_ids": [ - 1, 2, 3 - ], - "attributes": [ - "plane","car","bike" - ] + "document_ids": "1, 2, 3", + "attributes": "plane,car,bike" } """ form = request.form @@ -85,6 +81,9 @@ def create_document(): if _token is False: return make_response({"error": "invalid token"}, 401) + attributes_strings = attributes_strings.split(",") + document_ids = document_ids.split(",") + statistics = Statistics(False) user_id = _token.id diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index cc4c5142..2b004781 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -100,14 +100,18 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st self.load() attributes: list[Attribute] = [] statistics: Statistics = pickle.loads(statistics_dump) - + print("attributes_strings:", attributes_strings) + # convert attributes_strings to an array + attributes_strings = list[str](attributes_strings) for attribute_string in attributes_strings: + print("attribute_string:", attribute_string) if attribute_string == "": logger.error("Attribute names cannot be empty!") raise Exception("Attribute names cannot be empty!") - if attribute_string in [attribute.name for attribute in attributes]: - logger.error("Attribute names must be unique!") - raise Exception("Attribute names must be unique!") + for attribute in attributes: + if attribute_string == attribute.name: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique: " + attribute_string) attributes.append(Attribute(attribute_string)) """ From 69efbdb54c86a9b3be2a6e1c1799069e47313c73 Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 13:50:47 +0100 Subject: [PATCH 172/254] removed CONSTRAINT --- wannadb_web/postgres/transactions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index 2e56ff95..97524a65 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -76,8 +76,6 @@ def createDocumentsTable(schema): ON UPDATE CASCADE ON DELETE CASCADE NOT VALID, - CONSTRAINT check_only_one_filled - check (((content IS NOT NULL) AND (content_byte IS NULL)) OR ((content IS NOT NULL) AND (content_byte IS NULL))) ) TABLESPACE pg_default;""") @@ -372,6 +370,7 @@ def addDocument(name: str, content: Union[str, bytes], organisationId: int, user except IntegrityError as i: logger.error(str(i)) + print("addDocument failed because: \n", i) return -1 except Exception as e: From 81757ff5a1b02febc0bd18b3e43973a2da832ee7 Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 14:00:47 +0100 Subject: [PATCH 173/254] bug fix --- wannadb_web/postgres/transactions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wannadb_web/postgres/transactions.py b/wannadb_web/postgres/transactions.py index 97524a65..69a6bb41 100644 --- a/wannadb_web/postgres/transactions.py +++ b/wannadb_web/postgres/transactions.py @@ -75,7 +75,7 @@ def createDocumentsTable(schema): REFERENCES {schema}.users (id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE CASCADE - NOT VALID, + NOT VALID ) TABLESPACE pg_default;""") @@ -370,7 +370,6 @@ def addDocument(name: str, content: Union[str, bytes], organisationId: int, user except IntegrityError as i: logger.error(str(i)) - print("addDocument failed because: \n", i) return -1 except Exception as e: From d8c768714d3d673417ae7b6428e5f9bedf671757 Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 14:26:25 +0100 Subject: [PATCH 174/254] bug fixes --- wannadb_web/postgres/queries.py | 2 -- wannadb_web/routing/core.py | 6 +++--- wannadb_web/worker/data.py | 4 ++++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index b4c43441..ee90badc 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -203,8 +203,6 @@ def getDocumentsForOrganization(organisation_id: int): content = '' if document[2]: content = document[2] - elif document[3]: - content = document[3] doc_array.append({ "id": id, "name": name, diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 7f5386e8..f31b1a16 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -152,12 +152,12 @@ def task_status(task_id: str): task: AsyncResult = BaseTask().AsyncResult(task_id=task_id) status = task.status if status == "FAILURE": - return make_response({"state": "FAILURE", "meta": Signals(task_id)}, 500) + return make_response({"state": "FAILURE", "meta": Signals(task_id).to_json()}, 500) if status == "SUCCESS": - return make_response({"state": "SUCCESS", "meta": Signals(task_id)}, 200) + return make_response({"state": "SUCCESS", "meta": Signals(task_id).to_json()}, 200) if status is None: return make_response({"error": "task not found"}, 500) - return make_response({"state": task.status, "meta": Signals(task_id)}, 202) + return make_response({"state": task.status, "meta": Signals(task_id).to_json()}, 202) @core_routes.route('/status/', methods=['POST']) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 54aa30fd..d5367b7b 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -99,6 +99,8 @@ def emit(self, status: Any): class _State(Emitable): def to_json(self): + if self.msg is None: + return "" return self.msg.decode("utf-8") def emit(self, status: str): @@ -117,6 +119,8 @@ def emit(self, status: float): class _Error(Emitable): def to_json(self): + if self.msg is None: + return "" return self.msg.decode("utf-8") def emit(self, exception: BaseException): From 9841e643721b53180b32030c3c6d63b36782556c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 15:02:43 +0100 Subject: [PATCH 175/254] add tasks --- wannadb_web/worker/tasks.py | 349 +++++++++++------------------------- 1 file changed, 101 insertions(+), 248 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 2b004781..a857ad59 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -95,29 +95,22 @@ class CreateDocumentBase(BaseTask): def run(self, user_id: int, document_ids: list[int], attributes_strings: list[str], statistics_dump: bytes, base_name: str, organisation_id: int): - self._signals = Signals(str(self.request.id)) - self._redis_client = RedisCache(str(self.request.id)) self.load() attributes: list[Attribute] = [] statistics: Statistics = pickle.loads(statistics_dump) - print("attributes_strings:", attributes_strings) - # convert attributes_strings to an array - attributes_strings = list[str](attributes_strings) for attribute_string in attributes_strings: - print("attribute_string:", attribute_string) if attribute_string == "": logger.error("Attribute names cannot be empty!") raise Exception("Attribute names cannot be empty!") - for attribute in attributes: - if attribute_string == attribute.name: - logger.error("Attribute names must be unique!") - raise Exception("Attribute names must be unique: " + attribute_string) + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) """ init api """ - api = WannaDB_WebAPI(user_id, EmptyInteractionCallback(), base_name, organisation_id) + api = WannaDB_WebAPI(user_id, base_name, organisation_id) """ Creating document base @@ -146,240 +139,100 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st self.update(State.SUCCESS) return self -# -# -# @app.task(bind=True) -# def add_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): -# """ -# define values -# """ -# -# def task_callback_fn(state: str, meta: TaskObject): -# if isinstance(state, str) and state is not None and len(state) > 0: -# meta_dump = meta.to_dump() -# self.update_state(state=state, meta=meta_dump) -# else: -# raise Exception("task_callback_fn error Invalid state") -# -# task_callback = TaskCallback(task_callback_fn) -# -# task_object = TaskObject(task_callback) -# -# """ -# init api -# """ -# -# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) -# task_object.check() -# -# api.load_document_base_from_bson() -# task_object.check() -# -# if attributes_dump is not None: -# attributes: list[Attribute] = pickle.loads(attributes_dump) -# api.add_attributes(attributes) -# if task_object.signals.error.msg: -# task_object.update(State.FAILURE) -# raise task_object.signals.error.msg -# -# api.save_document_base_to_bson() -# task_object.check() -# task_object.update(state=State.SUCCESS) -# -# -# @app.task(bind=True) -# def remove_attributes(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, organisation_id: int): -# """ -# define values -# """ -# -# def task_callback_fn(state: str, meta: TaskObject): -# if isinstance(state, str) and state is not None and len(state) > 0: -# meta_dump = meta.to_dump() -# self.update_state(state=state, meta=meta_dump) -# else: -# raise Exception("task_callback_fn error Invalid state") -# -# task_callback = TaskCallback(task_callback_fn) -# -# task_object = TaskObject(task_callback) -# -# """ -# init api -# """ -# -# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) -# task_object.check() -# -# api.load_document_base_from_bson() -# task_object.check() -# -# if attributes_dump is not None: -# attributes: list[Attribute] = pickle.loads(attributes_dump) -# api.remove_attributes(attributes) -# if task_object.signals.error.msg: -# task_object.update(State.FAILURE) -# raise task_object.signals.error.msg -# -# api.save_document_base_to_bson() -# task_object.check() -# task_object.update(state=State.SUCCESS) -# -# -# @app.task(bind=True) -# def forget_matches_for_attribute(self, user_id: int, attribute_dump: Optional[bytes], base_name: str, -# organisation_id: int): -# """ -# define values -# """ -# -# def task_callback_fn(state: str, meta: TaskObject): -# if isinstance(state, str) and state is not None and len(state) > 0: -# meta_dump = meta.to_dump() -# self.update_state(state=state, meta=meta_dump) -# else: -# raise Exception("task_callback_fn error Invalid state") -# -# task_callback = TaskCallback(task_callback_fn) -# -# task_object = TaskObject(task_callback) -# -# """ -# init api -# """ -# -# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) -# task_object.check() -# -# api.load_document_base_from_bson() -# task_object.check() -# -# if attribute_dump is not None: -# attribute: Attribute = pickle.loads(attribute_dump) -# api.forget_matches_for_attribute(attribute) -# if task_object.signals.error.msg: -# task_object.update(State.FAILURE) -# raise task_object.signals.error.msg -# -# api.save_document_base_to_bson() -# task_object.check() -# task_object.update(state=State.SUCCESS) -# -# -# @app.task(bind=True) -# def forget_matches(self, user_id: int, base_name: str, organisation_id: int): -# """ -# define values -# """ -# -# def task_callback_fn(state: str, meta: TaskObject): -# if isinstance(state, str) and state is not None and len(state) > 0: -# meta_dump = meta.to_dump() -# self.update_state(state=state, meta=meta_dump) -# else: -# raise Exception("task_callback_fn error Invalid state") -# -# task_callback = TaskCallback(task_callback_fn) -# -# task_object = TaskObject(task_callback) -# -# """ -# init api -# """ -# -# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) -# task_object.check() -# -# api.load_document_base_from_bson() -# task_object.check() -# -# api.forget_matches() -# task_object.check() -# -# api.save_document_base_to_bson() -# task_object.check() -# task_object.update(state=State.SUCCESS) -# -# -# @app.task(bind=True) -# def interactive_table_population(self, user_id: int, attributes_dump: Optional[bytes], base_name: str, -# organisation_id: int): -# """ -# define values -# """ -# -# def task_callback_fn(state: str, meta: TaskObject): -# if isinstance(state, str) and state is not None and len(state) > 0: -# meta_dump = meta.update().to_dump() -# self.update_state(state=state, meta=meta_dump) -# else: -# raise Exception("task_callback_fn error Invalid state") -# -# task_callback = TaskCallback(task_callback_fn) -# -# def interaction_callback_fn(pipeline_element_identifier, feedback_request): -# feedback_request["identifier"] = pipeline_element_identifier -# self.feedback_request_to_ui.emit(feedback_request) -# -# self.feedback_mutex.lock() -# try: -# self.feedback_cond.wait(self.feedback_mutex) -# finally: -# self.feedback_mutex.unlock() -# -# return self.feedback -# -# interaction_callback = InteractionCallback(interaction_callback_fn) -# -# task_object = TaskObject(task_callback) -# -# """ -# init api -# """ -# -# api = WannaDB_WebAPI(user_id, task_object, base_name, organisation_id) -# task_object.check() -# -# api.load_document_base_from_bson() -# task_object.check() -# -# api.forget_matches() -# task_object.check() -# -# api.save_document_base_to_bson() -# task_object.check() -# task_object.update(state=State.SUCCESS) -# -# -# @app.task(bind=True) -# def long_task(self): -# try: -# """Background task that runs a long function with progress reports.""" -# verb = ['Starting up', 'Booting', 'Repairing', 'Loading', 'Checking'] -# adjective = ['master', 'radiant', 'silent', 'harmonic', 'fast'] -# noun = ['solar array', 'particle reshaper', 'cosmic ray', 'orbiter', 'bit'] -# data = '' -# total = random.randint(10, 50) -# -# def task_callback_fn(state: str, meta: TaskObject): -# if not isinstance(state, str): -# raise Exception("task_callback_fn error Invalid state") -# meta_dump = meta.to_dump() -# self.update_state(state=state, meta=meta_dump) -# -# task_callback = TaskCallback(task_callback_fn) -# -# task_object = TaskObject(task_callback) -# -# for i in range(total): -# if not data or random.random() < 0.25: -# data = '{0} {1} {2}...'.format(random.choice(verb), -# random.choice(adjective), -# random.choice(noun)) -# time.sleep(1) -# task_object.update(state=State.PENDING) -# task_object.update(state=State.SUCCESS) -# return data -# except Exception as e: -# self.update_state(state=State.FAILURE.value, meta={'exception': str(e)}) -# raise + +class DocumentBaseAddAttributes(BaseTask): + name = "DocumentBaseAddAttributes" + + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): + self.load() + attributes: list[Attribute] = [] + + for attribute_string in attributes_strings: + if attribute_string == "": + logger.error("Attribute names cannot be empty!") + raise Exception("Attribute names cannot be empty!") + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") + attributes.append(Attribute(attribute_string)) + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.add_attributes(attributes) + api.update_document_base_to_bson() + + +class DocumentBaseRemoveAttributes(BaseTask): + name = "DocumentBaseRemoveAttributes" + + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): + self.load() + attributes: list[Attribute] = [] + + for attribute_string in attributes_strings: + if attribute_string == "": + logger.error("Attribute names cannot be empty!") + raise Exception("Attribute names cannot be empty!") + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") + attributes.append(Attribute(attribute_string)) + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.remove_attributes(attributes) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + + +class DocumentBaseForgetMatches(BaseTask): + name = "DocumentBaseForgetMatches" + + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): + self.load() + attributes: list[Attribute] = [] + + for attribute_string in attributes_strings: + if attribute_string == "": + logger.error("Attribute names cannot be empty!") + raise Exception("Attribute names cannot be empty!") + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") + attributes.append(Attribute(attribute_string)) + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.forget_matches() + if api.signals.error.msg is None: + api.update_document_base_to_bson() + + +class DocumentBaseForgetMatchesForAttribute(BaseTask): + name = "DocumentBaseForgetMatches" + + def run(self, user_id: int, attribute_string: str, base_name: str, organisation_id: int): + self.load() + + attribute = (Attribute(attribute_string)) + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.forget_matches_for_attribute(attribute) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + + +class DocumentBaseInteractiveTablePopulation(BaseTask): + name = "DocumentBaseInteractiveTablePopulation" + + def run(self, user_id: int, base_name: str, organisation_id: int): + self._signals = Signals(str(self.request.id)) + self._redis_client = RedisCache(str(self.request.id)) + self.load() + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.interactive_table_population() + if api.signals.error.msg is None: + api.update_document_base_to_bson() From 2af6a593218a5f6e17fc4a6f90a500cb4ffdabb4 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 15:03:04 +0100 Subject: [PATCH 176/254] add interaction_callback_fn --- wannadb_web/worker/Web_API.py | 89 ++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 9ff3d50c..77d00b56 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -2,13 +2,14 @@ import io import json import logging +import time from typing import Optional import wannadb from wannadb import resources from wannadb.configuration import Pipeline from wannadb.data.data import Attribute, Document, DocumentBase -from wannadb.interaction import EmptyInteractionCallback, BaseInteractionCallback +from wannadb.interaction import EmptyInteractionCallback, BaseInteractionCallback, InteractionCallback from wannadb.matching.distance import SignalsMeanDistance from wannadb.matching.matching import RankingBasedMatcher from wannadb.preprocessing.embedding import BERTContextSentenceEmbedder, RelativePositionEmbedder, \ @@ -21,7 +22,7 @@ from wannadb.statistics import Statistics from wannadb.status import BaseStatusCallback, StatusCallback from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper -from wannadb_web.postgres.queries import getDocument_by_name +from wannadb_web.postgres.queries import getDocument_by_name, updateDocumentContent from wannadb_web.postgres.transactions import addDocument from wannadb_web.worker.data import Signals @@ -30,12 +31,13 @@ class WannaDB_WebAPI: - def __init__(self, user_id: int, - interaction_callback: BaseInteractionCallback, document_base_name: str, organisation_id: int): + def __init__(self, user_id: int, document_base_name: str, organisation_id: int): + self._document_id: Optional[int] = None self._document_base: Optional[DocumentBase] = None self.user_id = user_id - self.interaction_callback = interaction_callback + self.signals = Signals(str(self.user_id)) + self.signals.reset() self.sqLiteCacheDBWrapper = SQLiteCacheDBWrapper(user_id, db_file=":memory:") self.document_base_name = document_base_name self.organisation_id = organisation_id @@ -45,6 +47,14 @@ def status_callback_fn(message, progress): self.status_callback = StatusCallback(status_callback_fn) + def interaction_callback_fn(pipeline_element_identifier, feedback_request): + feedback_request["identifier"] = pipeline_element_identifier + self.signals.feedback_request_to_ui.emit(feedback_request) + logger.info("Waiting for feedback...") + time.sleep(2) + + self.interaction_callback = InteractionCallback(interaction_callback_fn) + if wannadb.resources.MANAGER is None: self.signals.error.emit(Exception("Resource Manager not initialized!")) raise Exception("Resource Manager not initialized!") @@ -53,8 +63,20 @@ def status_callback_fn(message, progress): raise Exception("Cache db could not be initialized!") logger.info("WannaDB_WebAPI initialized") + @property + def document_id(self): + if self._document_id is None: + raise Exception("Document ID not set!") + return self._document_id + + @document_id.setter + def document_id(self, value: int): + self._document_id = value + @property def document_base(self): + if self._document_base is None: + raise Exception("Document base not loaded!") return self._document_base @document_base.setter @@ -112,6 +134,7 @@ def load_document_base_from_bson(self): logger.debug("Called function 'load_document_base_from_bson'.") try: self.sqLiteCacheDBWrapper.reset_cache_db() + self.signals.reset() document_id, document = getDocument_by_name(self.document_base_name, self.organisation_id, self.user_id) if not isinstance(document, bytes): @@ -140,13 +163,11 @@ def load_document_base_from_bson(self): def save_document_base_to_bson(self): logger.debug("Called function 'save_document_base_to_bson'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - return + try: document_id = addDocument(self.document_base_name, self.document_base.to_bson(), self.organisation_id, self.user_id) + if document_id is None: logger.error("Document base could not be saved to BSON!") elif document_id == -1: @@ -158,6 +179,28 @@ def save_document_base_to_bson(self): elif document_id > 0: logger.info(f"Document base saved to BSON with ID {document_id}.") self.signals.status.emit(f"Document base saved to BSON with ID {document_id}.") + self.document_id = document_id + return + except Exception as e: + logger.error(str(e)) + self.signals.error.emit(e) + raise e + + def update_document_base_to_bson(self): + logger.debug("Called function 'save_document_base_to_bson'.") + + if self.document_id is None: + logger.error("Document ID not set!") + self.signals.error.emit(Exception("Document ID not set!")) + return + try: + status = updateDocumentContent(self.document_id, self.document_base.to_bson()) + if status is False: + logger.error(f"Document base could not be saved to BSON! Document {self.document_id} does not exist!") + elif status is True: + logger.info(f"Document base saved to BSON with ID {self.document_id}.") + self.signals.status.emit(f"Document base saved to BSON with ID {self.document_id}.") + logger.error("Document base could not be saved to BSON!") return except Exception as e: logger.error(str(e)) @@ -167,10 +210,7 @@ def save_document_base_to_bson(self): # todo: below not implemented yet def save_table_to_csv(self): logger.debug("Called function 'save_table_to_csv'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - return + try: buffer = io.StringIO() @@ -206,10 +246,7 @@ def save_table_to_csv(self): def add_attribute(self, attribute: Attribute): logger.debug("Called function 'add_attribute'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - elif attribute in self.document_base.attributes: + if attribute in self.document_base.attributes: logger.error("Attribute name already exists!") self.signals.error.emit(Exception("Attribute name already exists!")) else: @@ -220,11 +257,6 @@ def add_attribute(self, attribute: Attribute): def add_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'add_attributes'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - return - already_existing_names = [] for attribute in attributes: if attribute in self.document_base.attributes: @@ -240,10 +272,6 @@ def add_attributes(self, attributes: list[Attribute]): def remove_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'remove_attribute'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - return for attribute in attributes: if attribute in self.document_base.attributes: for document in self.document_base.documents: @@ -261,10 +289,7 @@ def remove_attributes(self, attributes: list[Attribute]): def forget_matches_for_attribute(self, attribute: Attribute): logger.debug("Called function 'forget_matches_for_attribute'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - return + self.sqLiteCacheDBWrapper.cache_db.delete_table(attribute.name) try: if attribute in self.document_base.attributes: @@ -283,10 +308,6 @@ def forget_matches_for_attribute(self, attribute: Attribute): def forget_matches(self): logger.debug("Called function 'forget_matches'.") - if self.document_base is None: - logger.error("Document base not loaded!") - self.signals.error.emit(Exception("Document base not loaded!")) - return for attribute in self.document_base.attributes: self.sqLiteCacheDBWrapper.cache_db.delete_table(attribute.name) self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) From 10a909119026c72b0055686102151b16176916ab Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 15:03:23 +0100 Subject: [PATCH 177/254] add(delete_user_space) --- wannadb_web/Redis/RedisCache.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/wannadb_web/Redis/RedisCache.py b/wannadb_web/Redis/RedisCache.py index b4764984..e8c25c9f 100644 --- a/wannadb_web/Redis/RedisCache.py +++ b/wannadb_web/Redis/RedisCache.py @@ -37,6 +37,20 @@ def delete(self, key: str) -> None: user_key = f"{self.user_space_key}:{key}" self.redis_client.delete(user_key) + def delete_user_space(self) -> None: + """Delete all entries associated with the user-specific space.""" + user_space_pattern = f"{self.user_space_key}:*" + + # Use SCAN to get all keys matching the pattern + keys_to_delete = [] + cursor = '0' + while cursor != 0: + cursor, keys = self.redis_client.scan(cursor=cursor, match=user_space_pattern) + keys_to_delete.extend(keys) + + # Delete all keys found + if keys_to_delete: + self.redis_client.delete(*keys_to_delete) def close(self) -> None: """Close the Redis connection for the user-specific space.""" From 1812bc2e0728d865a5d8bde85be42b6f538be083 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 15:04:05 +0100 Subject: [PATCH 178/254] feat(add): reset for resetting the signals --- wannadb_web/worker/data.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index d5367b7b..f1c2e022 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -53,6 +53,7 @@ def document_base_to_json(document_base: DocumentBase): class Signals: def __init__(self, user_id: str): + self.__user_id = user_id self.pipeline = _State("pipeline", user_id) self.feedback = _Signal("feedback", user_id) self.status = _State("status", user_id) @@ -60,7 +61,7 @@ def __init__(self, user_id: str): self.error = _Error("error", user_id) self.document_base_to_ui = _DocumentBase("document_base_to_ui", user_id) self.statistics = _Statistics("statistics_to_ui", user_id) - self.feedback_request_to_ui = _Dump("feedback_request_to_ui", user_id) + self.feedback_request_to_ui = _Feedback("feedback_request_to_ui", user_id) self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) def to_json(self) -> dict[str, str]: @@ -73,6 +74,9 @@ def to_json(self) -> dict[str, str]: self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + def reset(self): + RedisCache(self.__user_id).delete_user_space() + class Emitable(abc.ABC): @@ -153,7 +157,6 @@ def emit(self, status: DocumentBase): class _Statistics(Emitable): - @property def msg(self): return "not implemented" @@ -165,6 +168,17 @@ def emit(self, statistic: Statistics): pass +class _Feedback(Emitable): + + def to_json(self): + if self.msg is None: + return {} + return json.loads(self.msg) + + def emit(self, status: dict[str, Any]): + self.redis.set(self.type, json.dumps(status)) + + class _Dump(Emitable): def to_json(self): From 1a34e2211cc0b7639d9b4393703aee162877928a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 15:31:21 +0100 Subject: [PATCH 179/254] add fix + todo --- wannadb_web/routing/core.py | 15 +++++++++++++-- wannadb_web/worker/data.py | 4 +++- wannadb_web/worker/tasks.py | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index f31b1a16..5d359d99 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -149,6 +149,13 @@ def document_base(): @core_routes.route('/status/', methods=['GET']) def task_status(task_id: str): + ## todo: um des richtige feedback zu bekommen muss entweder die task_id oder der user_id mitgegeben werden + ## Signals(task_id).to_json() + ## oder + ## Signals(user_id).to_json() + ## die id muss die gleiche sein wie in + ## wannadb_web/worker/Web_API.py + ## also muss task: AsyncResult = BaseTask().AsyncResult(task_id=task_id) status = task.status if status == "FAILURE": @@ -162,5 +169,9 @@ def task_status(task_id: str): @core_routes.route('/status/', methods=['POST']) def task_update(task_id: str): - redis_client = RedisCache(task_id).redis_client - redis_client.set("input", "test") + signals = Signals(task_id) + + ## todo: hier muss feedback emitted werden im format: + ## { ------------------ } + + signals.feedback_request_from_ui.emit(request.json.get("feedback")) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index f1c2e022..386b458e 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -62,10 +62,12 @@ def __init__(self, user_id: str): self.document_base_to_ui = _DocumentBase("document_base_to_ui", user_id) self.statistics = _Statistics("statistics_to_ui", user_id) self.feedback_request_to_ui = _Feedback("feedback_request_to_ui", user_id) + self.feedback_request_from_ui = _Feedback("feedback_request_from_ui", user_id) self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) def to_json(self) -> dict[str, str]: - return {self.feedback.type: self.feedback.to_json(), + return {"user_id": self.__user_id, + self.feedback.type: self.feedback.to_json(), self.error.type: self.error.to_json(), self.status.type: self.status.to_json(), self.finished.type: self.finished.to_json(), diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index a857ad59..d0e43769 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -110,6 +110,7 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st """ init api """ + ## todo hier muss self.request.id durchgeleitet werden und in signals(request_id) gespeichert werden api = WannaDB_WebAPI(user_id, base_name, organisation_id) """ From f89b4d6fda7fd9106799608491bc9790a8ec5ef2 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 15:33:45 +0100 Subject: [PATCH 180/254] fix setter return type --- wannadb_web/worker/Web_API.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 77d00b56..6a028eae 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -85,6 +85,7 @@ def document_base(self, value: DocumentBase): raise TypeError("Document base must be of type DocumentBase!") self._document_base = value self.signals.document_base_to_ui.emit(value) + return def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") From ba670c0326c0655b5c5cbc5ec4404bcd942ea678 Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 16:32:37 +0100 Subject: [PATCH 181/254] added get document base end point --- wannadb_web/postgres/queries.py | 30 +++++++++++++++++++++++++++--- wannadb_web/routing/files.py | 16 +++++++++++++++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index ee90badc..299e5c20 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -200,9 +200,9 @@ def getDocumentsForOrganization(organisation_id: int): for document in result: id = document[0] name = document[1] - content = '' - if document[2]: - content = document[2] + if document[2] == None: + continue + content = document[2] doc_array.append({ "id": id, "name": name, @@ -210,6 +210,30 @@ def getDocumentsForOrganization(organisation_id: int): }) return doc_array +def getDocumentBaseForOrganization(organisation_id: int): + select_query = sql.SQL("""SELECT id, name,content,content_byte + FROM documents + WHERE organisationid = (%s) + """) + result = execute_query(select_query, (organisation_id,)) + + if result is None or len(result) == 0: + return [] + + doc_array = [] + + for document in result: + id = document[0] + name = document[1] + if document[3] == None: + continue + content = document[3] + doc_array.append({ + "id": id, + "name": name, + }) + return doc_array + def updateDocumentContent(doc_id: int, new_content): try: diff --git a/wannadb_web/routing/files.py b/wannadb_web/routing/files.py index 69701d3c..d9e14191 100644 --- a/wannadb_web/routing/files.py +++ b/wannadb_web/routing/files.py @@ -1,6 +1,6 @@ from flask import Blueprint, request, make_response -from wannadb_web.postgres.queries import deleteDocumentContent, getDocument, getDocumentsForOrganization, updateDocumentContent +from wannadb_web.postgres.queries import deleteDocumentContent, getDocument, getDocumentBaseForOrganization, getDocumentsForOrganization, updateDocumentContent from wannadb_web.util import tokenDecode from wannadb_web.postgres.transactions import addDocument @@ -55,6 +55,20 @@ def get_files_for_organization(_id): return make_response(documents, 200) +@main_routes.route('/organization/get/documentbase/<_id>', methods=['GET']) +def get_documentbase_for_organization(_id): + authorization = request.headers.get("authorization") + org_id = int(_id) + + token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + + + document_base = getDocumentBaseForOrganization(org_id) + + return make_response(document_base, 200) + @main_routes.route('/update/file/content', methods=['POST']) def update_file_content(): authorization = request.headers.get("authorization") From 611bbe0d8680a316ff9eb18661fd259d6eaba5ed Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 16:32:51 +0100 Subject: [PATCH 182/254] fixed status endpoint --- wannadb_web/routing/core.py | 38 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 5d359d99..b3f5e7b6 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -101,18 +101,12 @@ def document_base(): This endpoint is used to update a document base from a list of attributes. - Example Header: - { - "Authorization": "your_authorization_token" - } - - Example JSON Payload: + Example Form Payload: { + "authorization": "your_authorization_token" "organisationId": "your_organisation_id", "baseName": "your_document_base_name", - "attributes": [ - "plane","car","bike" - ] + "attributes": "plane,car,bike" } """ form = request.form @@ -129,6 +123,8 @@ def document_base(): if _token is False: return make_response({"error": "invalid token"}, 401) + attributes_strings = attributes_strings.split(",") + attributes = [] for att in attributes_string: attributes.append(Attribute(att)) @@ -147,24 +143,24 @@ def document_base(): # task_id=task.id)} -@core_routes.route('/status/', methods=['GET']) -def task_status(task_id: str): - ## todo: um des richtige feedback zu bekommen muss entweder die task_id oder der user_id mitgegeben werden - ## Signals(task_id).to_json() - ## oder - ## Signals(user_id).to_json() - ## die id muss die gleiche sein wie in - ## wannadb_web/worker/Web_API.py - ## also muss +@core_routes.route('/status//', methods=['GET']) +def task_status(token: str,task_id: str): + + _token = tokenDecode(token) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + user_id = _token.id + task: AsyncResult = BaseTask().AsyncResult(task_id=task_id) status = task.status if status == "FAILURE": - return make_response({"state": "FAILURE", "meta": Signals(task_id).to_json()}, 500) + return make_response({"state": "FAILURE", "meta": Signals(user_id).to_json()}, 500) if status == "SUCCESS": - return make_response({"state": "SUCCESS", "meta": Signals(task_id).to_json()}, 200) + return make_response({"state": "SUCCESS", "meta": Signals(user_id).to_json()}, 200) if status is None: return make_response({"error": "task not found"}, 500) - return make_response({"state": task.status, "meta": Signals(task_id).to_json()}, 202) + return make_response({"state": task.status, "meta": Signals(user_id).to_json()}, 202) @core_routes.route('/status/', methods=['POST']) From 816498c8164d79c144f035bb3ea76c5a6a1b7536 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 17:08:11 +0100 Subject: [PATCH 183/254] fix prod build --- docker-compose-prod.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index c6abd035..27a93333 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -23,11 +23,9 @@ services: dockerfile: Dockerfile target: worker tty: true - command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] + command: ['celery', '-A', 'celery_app', 'worker', '-l', 'info'] env_file: - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb networks: - mynetwork depends_on: @@ -40,11 +38,9 @@ services: dockerfile: Dockerfile target: worker tty: true - command: ['celery', '-A', 'app.celery', 'flower'] + command: ['celery', '-A', 'celery_app', 'flower'] env_file: - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb networks: - mynetwork ports: From 5f40c7355fdd6641b4f8279a38848b21bb72d1c4 Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 24 Jan 2024 17:17:15 +0100 Subject: [PATCH 184/254] added /document_base/attributes --- wannadb_web/routing/core.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index b3f5e7b6..9a0b9755 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,7 +39,7 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -67,7 +67,6 @@ def create_document(): } """ form = request.form - # authorization = request.headers.get("authorization") authorization = form.get("authorization") organisation_id: Optional[int] = form.get("organisationId") base_name = form.get("baseName") @@ -110,7 +109,6 @@ def document_base(): } """ form = request.form - # authorization = request.headers.get("authorization") authorization = form.get("authorization") organisation_id = form.get("organisationId") base_name = form.get("baseName") @@ -132,8 +130,12 @@ def document_base(): statistics = Statistics(False) user_id = _token.id - attributesDump = pickle.dumps(attributes) - statisticsDump = pickle.dumps(statistics) + #attributesDump = pickle.dumps(attributes) + #statisticsDump = pickle.dumps(statistics) + task = DocumentBaseAddAttributes().apply_async(args=(user_id, attributes_strings, + base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) # @core_routes.route('/longtask', methods=['POST']) From 546b5edab73d46cb1f5e87692a15491873da4af1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 17:18:06 +0100 Subject: [PATCH 185/254] fix prod build --- Dockerfile | 8 +++++--- docker-compose-prod.yaml | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 51d05a1a..ca10acec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,6 +37,10 @@ RUN pip install --use-pep517 pytest FROM build as worker +FROM build as worker-prod + +#copy the rest +COPY . . FROM build as dev @@ -48,10 +52,8 @@ CMD ["mypy","--install-types", "--non-interactive"] CMD ["flask", "--app", "app", "--debug", "run","--host","0.0.0.0", "--port", "8000" ] -FROM build as prod +FROM worker-prod as prod -#copy the rest -COPY . . RUN chmod +x wannadb_web/entrypoint.sh diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index 27a93333..dbe6270f 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -21,7 +21,7 @@ services: build: context: . dockerfile: Dockerfile - target: worker + target: worker-prod tty: true command: ['celery', '-A', 'celery_app', 'worker', '-l', 'info'] env_file: @@ -36,7 +36,7 @@ services: build: context: . dockerfile: Dockerfile - target: worker + target: worker-prod tty: true command: ['celery', '-A', 'celery_app', 'flower'] env_file: From 2984a80f7e694e94c885efc3b1c30d01d675af4e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 17:21:08 +0100 Subject: [PATCH 186/254] add DocumentBaseLoad --- wannadb_web/worker/tasks.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index d0e43769..7bd81925 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -140,6 +140,14 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st self.update(State.SUCCESS) return self +class DocumentBaseLoad(BaseTask): + name = "DocumentBaseLoad" + + def run(self, user_id: int, base_name: str, organisation_id: int): + self.load() + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + return self class DocumentBaseAddAttributes(BaseTask): name = "DocumentBaseAddAttributes" From 5ba38bae5db99279c42cae5e51fba31f39374850 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 24 Jan 2024 17:24:02 +0100 Subject: [PATCH 187/254] adj task return --- wannadb_web/worker/tasks.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 7bd81925..d4979d49 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -137,9 +137,14 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st api.create_document_base(documents, attributes, statistics) api.save_document_base_to_bson() - self.update(State.SUCCESS) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) return self + class DocumentBaseLoad(BaseTask): name = "DocumentBaseLoad" @@ -147,8 +152,13 @@ def run(self, user_id: int, base_name: str, organisation_id: int): self.load() api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() + if api.signals.error.msg is None: + self.update(State.SUCCESS) + return self + self.update(State.ERROR) return self + class DocumentBaseAddAttributes(BaseTask): name = "DocumentBaseAddAttributes" @@ -168,7 +178,12 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.add_attributes(attributes) - api.update_document_base_to_bson() + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self class DocumentBaseRemoveAttributes(BaseTask): @@ -192,6 +207,10 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.remove_attributes(attributes) if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self class DocumentBaseForgetMatches(BaseTask): @@ -215,6 +234,10 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.forget_matches() if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self class DocumentBaseForgetMatchesForAttribute(BaseTask): @@ -230,6 +253,10 @@ def run(self, user_id: int, attribute_string: str, base_name: str, organisation_ api.forget_matches_for_attribute(attribute) if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self class DocumentBaseInteractiveTablePopulation(BaseTask): @@ -245,3 +272,7 @@ def run(self, user_id: int, base_name: str, organisation_id: int): api.interactive_table_population() if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self From 3bc844c156c005a2c4dad9ad9348511f89a13342 Mon Sep 17 00:00:00 2001 From: cophilot Date: Thu, 25 Jan 2024 16:50:12 +0100 Subject: [PATCH 188/254] added load document base endpoint --- celery_app.py | 3 ++- wannadb_web/routing/core.py | 48 +++++++++++++++++++++++++++++-------- wannadb_web/worker/data.py | 2 +- wannadb_web/worker/tasks.py | 2 ++ 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/celery_app.py b/celery_app.py index 742d517e..a242d508 100644 --- a/celery_app.py +++ b/celery_app.py @@ -3,7 +3,7 @@ from celery import Celery -from wannadb_web.worker.tasks import BaseTask, TestTask, InitManager, CreateDocumentBase +from wannadb_web.worker.tasks import BaseTask, DocumentBaseLoad, TestTask, InitManager, CreateDocumentBase logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -15,3 +15,4 @@ app.register_task(TestTask) app.register_task(InitManager) app.register_task(CreateDocumentBase) +app.register_task(DocumentBaseLoad) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 9a0b9755..5f7b22e3 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,7 +39,7 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseLoad core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -47,19 +47,15 @@ @core_routes.route('/document_base', methods=['POST']) -def create_document(): +def create_document_base(): """ Endpoint for creating a document base. This endpoint is used to create a document base from a list of document ids and a list of attributes. - Example Header: - { - "Authorization": "your_authorization_token" - } - - Example JSON Payload: + Example Form Payload: { + "authorization": "your_authorization_token" "organisationId": "your_organisation_id", "baseName": "your_document_base_name", "document_ids": "1, 2, 3", @@ -92,9 +88,41 @@ def create_document(): return make_response({'task_id': task.id}, 202) +@core_routes.route('/document_base/load', methods=['POST']) +def load_document_base(): + """ + Endpoint for loading a document base. + + This endpoint is used to load a document base from a name and an organisation id. + + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + } + """ + form = request.form + authorization = form.get("authorization") + organisation_id: Optional[int] = form.get("organisationId") + base_name = form.get("baseName") + if (organisation_id is None or base_name is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + user_id = _token.id + + task = DocumentBaseLoad().apply_async(args=(user_id, base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) + -@core_routes.route('/document_base/attributes', methods=['UPDATE']) -def document_base(): +@core_routes.route('/document_base/attributes/add', methods=['UPDATE']) +def document_base_attribute_add(): """ Endpoint for update a document base. diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 386b458e..5dbaaba5 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -45,7 +45,7 @@ def attribute_to_json(attribute: Attribute): def document_base_to_json(document_base: DocumentBase): return { - 'msg': {"attributes ": [attribute.name for attribute in document_base.attributes], + 'msg': {"attributes": [attribute.name for attribute in document_base.attributes], "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] } } diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index d4979d49..e6bd84d2 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -152,6 +152,8 @@ def run(self, user_id: int, base_name: str, organisation_id: int): self.load() api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() + #self.update(State.SUCCESS) + #return self if api.signals.error.msg is None: self.update(State.SUCCESS) return self From 7d7363b4d21ea62a6ae7d33a1d9c2cfbe2833fb3 Mon Sep 17 00:00:00 2001 From: cophilot Date: Thu, 25 Jan 2024 21:59:28 +0100 Subject: [PATCH 189/254] added todo --- wannadb_web/worker/Web_API.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 6a028eae..c414df04 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -165,6 +165,8 @@ def load_document_base_from_bson(self): def save_document_base_to_bson(self): logger.debug("Called function 'save_document_base_to_bson'.") + # TODO does not work in prod!!! + try: document_id = addDocument(self.document_base_name, self.document_base.to_bson(), self.organisation_id, self.user_id) From ba22f73bea6aef212d95dd648395c3d20a9819ae Mon Sep 17 00:00:00 2001 From: cophilot Date: Sat, 27 Jan 2024 17:08:42 +0100 Subject: [PATCH 190/254] added new-stable-worklfow --- .github/workflows/new_stable_version.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/new_stable_version.yml diff --git a/.github/workflows/new_stable_version.yml b/.github/workflows/new_stable_version.yml new file mode 100644 index 00000000..452828e9 --- /dev/null +++ b/.github/workflows/new_stable_version.yml @@ -0,0 +1,21 @@ +name: Create New Stable Version + +on: + workflow_dispatch: + +jobs: + create_pull_request: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Create Pull Request + uses: peter-evans/create-pull-request@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + branch: stable + base: main + title: Automated New Stable Version + body: This pull request was automatically created by the workflow and contains the latest stable version of the repository. From 7b1bbc36b9e05dfe8632b9a8ba91325769338c90 Mon Sep 17 00:00:00 2001 From: cophilot Date: Sat, 27 Jan 2024 17:09:00 +0100 Subject: [PATCH 191/254] added update docbase --- celery_app.py | 8 ++++- wannadb_web/postgres/queries.py | 8 ++--- wannadb_web/routing/core.py | 58 +++++++++++++++++++++++++++++---- wannadb_web/util.py | 5 +-- wannadb_web/worker/Web_API.py | 21 ++++++++++-- wannadb_web/worker/tasks.py | 25 ++++++++++++++ 6 files changed, 110 insertions(+), 15 deletions(-) diff --git a/celery_app.py b/celery_app.py index a242d508..e783583b 100644 --- a/celery_app.py +++ b/celery_app.py @@ -3,7 +3,7 @@ from celery import Celery -from wannadb_web.worker.tasks import BaseTask, DocumentBaseLoad, TestTask, InitManager, CreateDocumentBase +from wannadb_web.worker.tasks import BaseTask, DocumentBaseAddAttributes, DocumentBaseForgetMatches, DocumentBaseForgetMatchesForAttribute, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, DocumentBaseRemoveAttributes, DocumentBaseUpdateAttributes, TestTask, InitManager, CreateDocumentBase logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -16,3 +16,9 @@ app.register_task(InitManager) app.register_task(CreateDocumentBase) app.register_task(DocumentBaseLoad) +app.register_task(DocumentBaseAddAttributes) +app.register_task(DocumentBaseUpdateAttributes) +app.register_task(DocumentBaseRemoveAttributes) +app.register_task(DocumentBaseForgetMatches) +app.register_task(DocumentBaseForgetMatchesForAttribute) +app.register_task(DocumentBaseInteractiveTablePopulation) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 299e5c20..5143d929 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -143,7 +143,7 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int) Exception: if multiple documents with that name are found """ - select_query = sql.SQL("""SELECT name,content,content_byte + select_query = sql.SQL("""SELECT id,content,content_byte FROM documents d JOIN membership m ON d.organisationid = m.organisationid WHERE d.name = (%s) AND m.userid = (%s) AND m.organisationid = (%s) @@ -152,13 +152,13 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int) result = execute_query(select_query, (document_name, user_id, organisation_id,)) if len(result) == 1: document = result[0] - name = document[0] + id = document[0] if document[1]: content = document[1] - return str(name), str(content) + return str(id), str(content) elif document[2]: content = document[2] - return str(name), bytes(content) + return str(id), bytes(content) elif len(result) > 1: raise Exception("Multiple documents with the same name found") raise Exception("No document with that name found") diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 5f7b22e3..ce0bbd2f 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,7 +39,7 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseLoad +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseLoad, DocumentBaseUpdateAttributes core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -121,12 +121,12 @@ def load_document_base(): return make_response({'task_id': task.id}, 202) -@core_routes.route('/document_base/attributes/add', methods=['UPDATE']) +@core_routes.route('/document_base/attributes/add', methods=['POST']) def document_base_attribute_add(): """ - Endpoint for update a document base. + Endpoint for add attributes to a document base. - This endpoint is used to update a document base from a list of attributes. + This endpoint is used to add attributes to a document base from a list of attributes. Example Form Payload: { @@ -165,6 +165,49 @@ def document_base_attribute_add(): return make_response({'task_id': task.id}, 202) +@core_routes.route('/document_base/attributes/update', methods=['POST']) +def document_base_attribute_update(): + """ + Endpoint for update the attributes of a document base. + + This endpoint is used to update the attributes of a document base from a list of attributes. + + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "attributes": "plane,car,bike" + } + """ + form = request.form + authorization = form.get("authorization") + organisation_id = form.get("organisationId") + base_name = form.get("baseName") + attributes_string = form.get("attributes") + if (organisation_id is None or base_name is None or attributes_string is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + attributes_string = attributes_string.split(",") + + #attributes = [] + #for att in attributes_string: + # attributes.append(Attribute(att)) + # + #statistics = Statistics(False) + + user_id = _token.id + + task = DocumentBaseUpdateAttributes().apply_async(args=(user_id, attributes_string, + base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) + # @core_routes.route('/longtask', methods=['POST']) # def longtask(): @@ -187,10 +230,13 @@ def task_status(token: str,task_id: str): if status == "FAILURE": return make_response({"state": "FAILURE", "meta": Signals(user_id).to_json()}, 500) if status == "SUCCESS": - return make_response({"state": "SUCCESS", "meta": Signals(user_id).to_json()}, 200) + signals = Signals(user_id).to_json() + return make_response({"state": "SUCCESS", "meta": signals}, 200) if status is None: return make_response({"error": "task not found"}, 500) - return make_response({"state": task.status, "meta": Signals(user_id).to_json()}, 202) + + signals = Signals(user_id).to_json() + return make_response({"state": task.status, "meta": signals}, 202) @core_routes.route('/status/', methods=['POST']) diff --git a/wannadb_web/util.py b/wannadb_web/util.py index 305ca3f8..569e5364 100644 --- a/wannadb_web/util.py +++ b/wannadb_web/util.py @@ -29,8 +29,9 @@ def tokenDecode(string: str): try: decoded_token = jwt.decode(string, _jwtkey, leeway=datetime.timedelta(minutes=1), algorithms="HS256", verify=True) - except jwt.ExpiredSignatureError: - return False + #except jwt.ExpiredSignatureError: + except: + return None user = decoded_token.get('user') _id = int(decoded_token.get('id')) exp = decoded_token.get('exp') diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index c414df04..64620180 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -156,6 +156,7 @@ def load_document_base_from_bson(self): logger.info(f"Document base loaded from BSON with id {document_id}.") self.document_base = document_base + self.document_id = document_id except Exception as e: logger.error(str(e)) @@ -165,8 +166,6 @@ def load_document_base_from_bson(self): def save_document_base_to_bson(self): logger.debug("Called function 'save_document_base_to_bson'.") - # TODO does not work in prod!!! - try: document_id = addDocument(self.document_base_name, self.document_base.to_bson(), self.organisation_id, self.user_id) @@ -197,6 +196,13 @@ def update_document_base_to_bson(self): self.signals.error.emit(Exception("Document ID not set!")) return try: + print("BASE") + print(self.document_base) + print("ID") + print(self.document_id) + print("ATT") + print(self.document_base.attributes) + status = updateDocumentContent(self.document_id, self.document_base.to_bson()) if status is False: logger.error(f"Document base could not be saved to BSON! Document {self.document_id} does not exist!") @@ -289,6 +295,17 @@ def remove_attributes(self, attributes: list[Attribute]): else: logger.error("Attribute name does not exist!") self.signals.error.emit(Exception("Attribute name does not exist!")) + + def update_attributes(self, attributes: list[Attribute]): + logger.debug("Called function 'update_attributes'.") + self.document_base.attributes.clear() + for attribute in attributes: + if attribute is None: + logger.info("Attribute name must not be empty and was thus ignored.") + else: + self.document_base.attributes.append(attribute) + self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) + logger.debug(f"Attribute '{attribute.name}' added.") def forget_matches_for_attribute(self, attribute: Attribute): logger.debug("Called function 'forget_matches_for_attribute'.") diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index e6bd84d2..d8c72578 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -187,6 +187,31 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ self.update(State.ERROR) return self +class DocumentBaseUpdateAttributes(BaseTask): + name = "DocumentBaseAddAttributes" + + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): + self.load() + attributes: list[Attribute] = [] + + for attribute_string in attributes_strings: + if attribute_string == "": + logger.error("Attribute names cannot be empty!") + raise Exception("Attribute names cannot be empty!") + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") + attributes.append(Attribute(attribute_string)) + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.update_attributes(attributes) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self class DocumentBaseRemoveAttributes(BaseTask): name = "DocumentBaseRemoveAttributes" From da21a012c88fb46da7bb01efcbfbbb24e03f6e48 Mon Sep 17 00:00:00 2001 From: Ramzes Khotambekzoda Date: Mon, 29 Jan 2024 14:10:52 +0100 Subject: [PATCH 192/254] added fix_button() => the fix_button() should make it possible, the user to edit the nuggets => the function expects the object of type InformationNugget and returns all possible nuggets the user can choose --- wannadb_web/routing/core.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index f31b1a16..7e974377 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -40,6 +40,7 @@ from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask +from wannadb.data.signals import CachedDistanceSignal core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -164,3 +165,17 @@ def task_status(task_id: str): def task_update(task_id: str): redis_client = RedisCache(task_id).redis_client redis_client.set("input", "test") + +@core_routes.route('/fix_button', methods=['POST']) +def fix_button(): + try: + data = request.json + # the nugget is type of InformationNugget + inf_Nugget= data.get("InformationNugget") + document = inf_Nugget.document + nuggets_sorted_by_distance = list(sorted(document.nuggets, key=lambda x: x[CachedDistanceSignal])) + fix_button_result = [nugget for nugget in nuggets_sorted_by_distance] + return make_response({"status": "success", "result": fix_button_result}) + except Exception as e: + return make_response({"status": "error", "error_message": str(e)}), 500 + From f097066988c0590df60ada957b48b40967655575 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 30 Jan 2024 17:37:04 +0100 Subject: [PATCH 193/254] feat(fix_button): add temporary rout fix_button --- wannadb_web/routing/core.py | 52 +++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 7e974377..306f4b5b 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,8 +39,7 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask -from wannadb.data.signals import CachedDistanceSignal +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseGetOrdertNuggets core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -166,16 +165,43 @@ def task_update(task_id: str): redis_client = RedisCache(task_id).redis_client redis_client.set("input", "test") + +## todo: renaming of the endpoint + @core_routes.route('/fix_button', methods=['POST']) -def fix_button(): - try: - data = request.json - # the nugget is type of InformationNugget - inf_Nugget= data.get("InformationNugget") - document = inf_Nugget.document - nuggets_sorted_by_distance = list(sorted(document.nuggets, key=lambda x: x[CachedDistanceSignal])) - fix_button_result = [nugget for nugget in nuggets_sorted_by_distance] - return make_response({"status": "success", "result": fix_button_result}) - except Exception as e: - return make_response({"status": "error", "error_message": str(e)}), 500 +def sort_nuggets(): + """ + Endpoint for creating a document base. + + This endpoint is used to create a document base from a list of document ids and a list of attributes. + + Example Header: + { + "Authorization": "your_authorization_token" + } + + Example JSON Payload: + { + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "document_id": "1", (important: only one document id) + "attributes": "plane,car,bike" + } + """ + form = request.form + authorization = form.get("authorization") + organisation_id: Optional[int] = form.get("organisationId") + base_name = form.get("baseName") + document_id = form.get("document_ids") + if organisation_id is None or base_name is None or document_id is None or authorization is None: + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + user_id = _token.id + + task = DocumentBaseGetOrdertNuggets().apply_async(args=(user_id, base_name, organisation_id, document_id)) + + return make_response({'task_id': task.id}, 202) From 19782429bec142237c29a1bae09537eea98103d9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 30 Jan 2024 17:37:45 +0100 Subject: [PATCH 194/254] feat(Emitable): add Nugget --- wannadb_web/worker/data.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index f1c2e022..642b8acd 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -26,6 +26,12 @@ def nugget_to_json(nugget: InformationNugget): "start_char": str(nugget.start_char)} +def nuggets_to_json(nuggets: list[InformationNugget]): + return { + str(i): nugget_to_json(nugget) for i, nugget in enumerate(nuggets) + } + + def document_to_json(document: Document): return { "name": document.name, @@ -63,6 +69,7 @@ def __init__(self, user_id: str): self.statistics = _Statistics("statistics_to_ui", user_id) self.feedback_request_to_ui = _Feedback("feedback_request_to_ui", user_id) self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) + self.ordert_nuggets = _Nuggets("ordert_nuggets", user_id) def to_json(self) -> dict[str, str]: return {self.feedback.type: self.feedback.to_json(), @@ -131,7 +138,7 @@ def emit(self, exception: BaseException): self.redis.set(self.type, str(exception)) -class _Nugget(Emitable): +class _Nuggets(Emitable): def to_json(self): if self.msg is None: @@ -140,8 +147,8 @@ def to_json(self): raise TypeError("_Nugget msg must be of type str") return self.msg - def emit(self, status: InformationNugget): - self.redis.set(self.type, json.dumps(nugget_to_json(status))) + def emit(self, status: list[InformationNugget]): + self.redis.set(self.type, json.dumps(nuggets_to_json(status))) class _DocumentBase(Emitable): From bb78abf822d3d65524dc84090df2938aaa5cdf69 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 30 Jan 2024 17:38:06 +0100 Subject: [PATCH 195/254] feat(Task): add DocumentBaseGetOrdertNuggets --- wannadb_web/worker/tasks.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index a857ad59..a88fc69c 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -236,3 +236,17 @@ def run(self, user_id: int, base_name: str, organisation_id: int): api.interactive_table_population() if api.signals.error.msg is None: api.update_document_base_to_bson() + + +class DocumentBaseGetOrdertNuggets(BaseTask): + name = "DocumentBaseGetOrdertNuggets" + + def run(self, user_id: int, base_name: str, organisation_id: int, document_id: int): + self._signals = Signals(str(self.request.id)) + self._redis_client = RedisCache(str(self.request.id)) + self.load() + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.get_ordert_nuggets(document_id) + # no need to update the document base From 00dfc7c144bf8a852b1a5c3a9daf90cf1398c44d Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 30 Jan 2024 17:39:13 +0100 Subject: [PATCH 196/254] feat(WannaDB_WebAPI): add get_ordert_nuggets --- wannadb_web/worker/Web_API.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 77d00b56..8e2e0e0a 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -9,7 +9,8 @@ from wannadb import resources from wannadb.configuration import Pipeline from wannadb.data.data import Attribute, Document, DocumentBase -from wannadb.interaction import EmptyInteractionCallback, BaseInteractionCallback, InteractionCallback +from wannadb.data.signals import CachedDistanceSignal +from wannadb.interaction import EmptyInteractionCallback, InteractionCallback from wannadb.matching.distance import SignalsMeanDistance from wannadb.matching.matching import RankingBasedMatcher from wannadb.preprocessing.embedding import BERTContextSentenceEmbedder, RelativePositionEmbedder, \ @@ -20,9 +21,9 @@ from wannadb.preprocessing.normalization import CopyNormalizer from wannadb.preprocessing.other_processing import ContextSentenceCacher from wannadb.statistics import Statistics -from wannadb.status import BaseStatusCallback, StatusCallback +from wannadb.status import StatusCallback from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper -from wannadb_web.postgres.queries import getDocument_by_name, updateDocumentContent +from wannadb_web.postgres.queries import getDocument_by_name, updateDocumentContent, getDocument from wannadb_web.postgres.transactions import addDocument from wannadb_web.worker.data import Signals @@ -86,6 +87,22 @@ def document_base(self, value: DocumentBase): self._document_base = value self.signals.document_base_to_ui.emit(value) + def get_ordert_nuggets(self, document_id: int): + document = getDocument(document_id, self.user_id) + if document is None: + logger.error(f"Document with id {document_id} not found!") + self.signals.error.emit(Exception(f"Document with id {document_id} not found!")) + return + document_name = document[0] + logger.debug("get_ordert_nuggets") + self.signals.status.emit("get_ordert_nuggets") + for document in self.document_base.documents: + if document.name == document_name: + self.signals.ordert_nuggets.emit(list(sorted(document.nuggets, key=lambda x: x[CachedDistanceSignal]))) + return + logger.error(f"Document \"{document_name}\" not found in document base!") + self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) + def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") self.signals.status.emit("create_document_base") From 7146a656496460232bee9411b105795f7fad16a8 Mon Sep 17 00:00:00 2001 From: Ramzes Khotambekzoda Date: Thu, 1 Feb 2024 20:00:18 +0100 Subject: [PATCH 197/254] added match_feedback function --- wannadb_web/worker/Web_API.py | 41 ++++++++++++++++++++++++++++++++++- wannadb_web/worker/data.py | 11 +++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 8e2e0e0a..65b6eb6a 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -8,7 +8,7 @@ import wannadb from wannadb import resources from wannadb.configuration import Pipeline -from wannadb.data.data import Attribute, Document, DocumentBase +from wannadb.data.data import Attribute, Document, DocumentBase, InformationNugget from wannadb.data.signals import CachedDistanceSignal from wannadb.interaction import EmptyInteractionCallback, InteractionCallback from wannadb.matching.distance import SignalsMeanDistance @@ -102,7 +102,46 @@ def get_ordert_nuggets(self, document_id: int): return logger.error(f"Document \"{document_name}\" not found in document base!") self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) + +#### TODO to check from here ### + def getDocument(self,document_name): + for document in self.document_base.documents: + if document.name == document_name: + return document + + def confirm_nugget(self, nugget:str, document, start_index, end_index): + try: + if document.text.rfind(nugget, start_index, end_index) >= 0: + return True + else: + self.signals.error.emit(Exception("Nugget is not in the Text")) + except Exception as e: + logger.error(str(e)) + self.signals.error.emit(e) + raise e + + def match_feedback(self, nugget, document_name, start_index = None, end_index = None): + logger.debug("match_feedback") + self.signals.status.emit("match_feedback") + if isinstance(nugget, str): + document = self.getDocument(document_name) + if self.confirm_nugget(nugget, document,start_index, end_index): + self.signals.match_feedback.emit({ + "message": "custom-match", + "document": document, + "start": start_index, + "end": end_index + }) + else: + self.signals.match_feedback.emit({ + "message": "is-match", + "nugget": nugget, + "not-a-match": None + }) + +#### END here ### + def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") self.signals.status.emit("create_document_base") diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 642b8acd..e9f38c59 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -70,6 +70,7 @@ def __init__(self, user_id: str): self.feedback_request_to_ui = _Feedback("feedback_request_to_ui", user_id) self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) self.ordert_nuggets = _Nuggets("ordert_nuggets", user_id) + self.match_feedback = _MatchFeedback("match_feedback", user_id) def to_json(self) -> dict[str, str]: return {self.feedback.type: self.feedback.to_json(), @@ -106,6 +107,14 @@ def to_json(self): def emit(self, status: Any): raise NotImplementedError +class _MatchFeedback(Emitable): + def to_json(self): + if self.msg is None: + return {} + return json.loads(self.msg) + + def emit(self, status: dict[str, Any]): + self.redis.set(self.type, json.dumps(status)) class _State(Emitable): @@ -125,7 +134,7 @@ def to_json(self): def emit(self, status: float): self.redis.set(self.type, str(status)) - + class _Error(Emitable): From adcb25961b04bda057324c282014b6e70105ede1 Mon Sep 17 00:00:00 2001 From: Ramzes Khotambekzoda Date: Sun, 4 Feb 2024 14:59:38 +0100 Subject: [PATCH 198/254] Update match_button --- wannadb_web/worker/Web_API.py | 50 +++++++++-------------------------- wannadb_web/worker/tasks.py | 41 ++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 65b6eb6a..c03614c9 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -36,6 +36,7 @@ def __init__(self, user_id: int, document_base_name: str, organisation_id: int): self._document_id: Optional[int] = None self._document_base: Optional[DocumentBase] = None self.user_id = user_id + self._feedback = None self.signals = Signals(str(self.user_id)) self.signals.reset() @@ -63,7 +64,18 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): self.signals.error.emit(Exception("Cache db could not be initialized!")) raise Exception("Cache db could not be initialized!") logger.info("WannaDB_WebAPI initialized") + + @property + def feedback(self): + if self._feedback is None: + raise Exception("Feedback is not set!") + return self._feedback + + @feedback.setter + def feedback(self, value:dict): + self._feedback = value + @property def document_id(self): if self._document_id is None: @@ -103,44 +115,6 @@ def get_ordert_nuggets(self, document_id: int): logger.error(f"Document \"{document_name}\" not found in document base!") self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) - -#### TODO to check from here ### - def getDocument(self,document_name): - for document in self.document_base.documents: - if document.name == document_name: - return document - - def confirm_nugget(self, nugget:str, document, start_index, end_index): - try: - if document.text.rfind(nugget, start_index, end_index) >= 0: - return True - else: - self.signals.error.emit(Exception("Nugget is not in the Text")) - except Exception as e: - logger.error(str(e)) - self.signals.error.emit(e) - raise e - - def match_feedback(self, nugget, document_name, start_index = None, end_index = None): - logger.debug("match_feedback") - self.signals.status.emit("match_feedback") - if isinstance(nugget, str): - document = self.getDocument(document_name) - if self.confirm_nugget(nugget, document,start_index, end_index): - self.signals.match_feedback.emit({ - "message": "custom-match", - "document": document, - "start": start_index, - "end": end_index - }) - else: - self.signals.match_feedback.emit({ - "message": "is-match", - "nugget": nugget, - "not-a-match": None - }) - -#### END here ### def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index a88fc69c..6d6b015e 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -1,12 +1,12 @@ import logging import pickle import time -from typing import Optional, Any +from typing import Optional, Any, Union from celery import Task import wannadb.resources -from wannadb.data.data import Document, Attribute +from wannadb.data.data import Document, Attribute, DocumentBase, InformationNugget from wannadb.interaction import EmptyInteractionCallback from wannadb.resources import ResourceManager from wannadb.statistics import Statistics @@ -250,3 +250,40 @@ def run(self, user_id: int, base_name: str, organisation_id: int, document_id: i api.load_document_base_from_bson() api.get_ordert_nuggets(document_id) # no need to update the document base + + +def getDocument(document_name : str, document_base : DocumentBase): + for document in document_base.documents: + if document.name == document_name: + return document + +def nugget_exist(nugget:str, document:Document, start_index:int, end_index:int): + if document.text.rfind(nugget, start_index, end_index) >= 0: + return True + else: + raise Exception("Nugget does not exist in the given Text") + + +def match_feedback(nugget:Union[str, InformationNugget] , document_name :str , document_base : DocumentBase, start_index :int = None, end_index:int = None): + logger.debug("match_feedback") + if isinstance(nugget, str): + document = getDocument(document_name,document_base) + if document is None: + logger.error("The document is missing in document base") + raise Exception("The document is missing in document base") + if start_index is None or end_index is None: + logger.error("Start-index or end-index are missing to find the custom nugget") + raise Exception("Start-index or end-index are missing to find the custom nugget") + elif nugget_exist(nugget, document,start_index, end_index): + return { + "message": "custom-match", + "document": document, + "start": start_index, + "end": end_index + } + else: + return { + "message": "is-match", + "nugget": nugget , + "not-a-match": None + } \ No newline at end of file From 2387b7ec3a7f8f9ee745ca885c14fbf47704a03c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 13:23:32 +0100 Subject: [PATCH 199/254] reformat: typo --- wannadb_web/routing/core.py | 4 ++-- wannadb_web/worker/tasks.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 306f4b5b..ca2bbe62 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,7 +39,7 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseGetOrdertNuggets +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseGetOrderedNuggets core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -202,6 +202,6 @@ def sort_nuggets(): user_id = _token.id - task = DocumentBaseGetOrdertNuggets().apply_async(args=(user_id, base_name, organisation_id, document_id)) + task = DocumentBaseGetOrderedNuggets().apply_async(args=(user_id, base_name, organisation_id, document_id)) return make_response({'task_id': task.id}, 202) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 6d6b015e..8cd2995c 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -238,8 +238,8 @@ def run(self, user_id: int, base_name: str, organisation_id: int): api.update_document_base_to_bson() -class DocumentBaseGetOrdertNuggets(BaseTask): - name = "DocumentBaseGetOrdertNuggets" +class DocumentBaseGetOrderedNuggets(BaseTask): + name = "DocumentBaseGetOrderedNuggets" def run(self, user_id: int, base_name: str, organisation_id: int, document_id: int): self._signals = Signals(str(self.request.id)) From 6b12293bb75ce71225c12b4f285a7e986905bafe Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 14:24:43 +0100 Subject: [PATCH 200/254] add(_MatchFeedback): proper typing --- wannadb_web/worker/data.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index e9f38c59..da9b5871 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -107,12 +107,22 @@ def to_json(self): def emit(self, status: Any): raise NotImplementedError + class _MatchFeedback(Emitable): + + @property + def msg(self): + msg = self.redis.get(self.type) + if msg is None: + return None + m: dict[str, Any] = json.loads(msg) + return m + def to_json(self): if self.msg is None: return {} return json.loads(self.msg) - + def emit(self, status: dict[str, Any]): self.redis.set(self.type, json.dumps(status)) From 20244deafa00fe1c8c6016857838f21f72211bd6 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 14:25:27 +0100 Subject: [PATCH 201/254] add(tasks): add updates --- wannadb_web/worker/tasks.py | 56 +++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 8cd2995c..e77913ce 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -160,6 +160,8 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.load_document_base_from_bson() api.add_attributes(attributes) api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self class DocumentBaseRemoveAttributes(BaseTask): @@ -183,6 +185,8 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.remove_attributes(attributes) if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self class DocumentBaseForgetMatches(BaseTask): @@ -206,6 +210,8 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.forget_matches() if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self class DocumentBaseForgetMatchesForAttribute(BaseTask): @@ -221,6 +227,8 @@ def run(self, user_id: int, attribute_string: str, base_name: str, organisation_ api.forget_matches_for_attribute(attribute) if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self class DocumentBaseInteractiveTablePopulation(BaseTask): @@ -236,6 +244,8 @@ def run(self, user_id: int, base_name: str, organisation_id: int): api.interactive_table_population() if api.signals.error.msg is None: api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self class DocumentBaseGetOrderedNuggets(BaseTask): @@ -250,14 +260,50 @@ def run(self, user_id: int, base_name: str, organisation_id: int, document_id: i api.load_document_base_from_bson() api.get_ordert_nuggets(document_id) # no need to update the document base + self.update(State.SUCCESS) + return self + -def getDocument(document_name : str, document_base : DocumentBase): - for document in document_base.documents: - if document.name == document_name: - return document + + +class DocumentBaseConfirmNugget(BaseTask): + name = "DocumentBaseGetOrderedNuggets" -def nugget_exist(nugget:str, document:Document, start_index:int, end_index:int): + def run(self, user_id: int, base_name: str, organisation_id: int, + document_id_for_nugget_x: int, nugget: Union[str, InformationNugget], + start_index: int, end_index: int, interactive_call_task_id: str): + """ + :param user_id: user id + :param base_name: name of base document + :param organisation_id: organisation id of the document base + :param document_id_for_nugget_x: the document id for the document that gets a new nugget + :param nugget: the Nugget that gets confirmed + :param start_index: start of the nugget in the document + :param end_index: end of the nugget in the document + :param interactive_call_task_id: the same task id that's used for interactive call + """ + self._signals = Signals(interactive_call_task_id) + self._redis_client = RedisCache(str(self.request.id)) + self.load() + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + + document_name, document_text = getDocuments([document_id_for_nugget_x], user_id)[0] + + document = Document(document_name, document_text) + + self._signals.match_feedback.emit(match_feedback(nugget, document, start_index, end_index)) + # no need to update the document base the doc will be saved in the interactive call + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + + + +def nugget_exist(nugget: str, document: Document, start_index: int, end_index: int): if document.text.rfind(nugget, start_index, end_index) >= 0: return True else: From 5983a064b8af7ea28855aa1e2945f33626b69679 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 14:27:09 +0100 Subject: [PATCH 202/254] add(match_feedback): adj parameter --- wannadb_web/worker/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index e77913ce..b472bccf 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -310,10 +310,10 @@ def nugget_exist(nugget: str, document: Document, start_index: int, end_index: i raise Exception("Nugget does not exist in the given Text") -def match_feedback(nugget:Union[str, InformationNugget] , document_name :str , document_base : DocumentBase, start_index :int = None, end_index:int = None): +def match_feedback(nugget: Union[str, InformationNugget], document: Document, + start_index: int = None, end_index: int = None): logger.debug("match_feedback") if isinstance(nugget, str): - document = getDocument(document_name,document_base) if document is None: logger.error("The document is missing in document base") raise Exception("The document is missing in document base") From 7d982096096715ef50c22376835c97c0c277291c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 14:27:34 +0100 Subject: [PATCH 203/254] formatting --- wannadb_web/worker/tasks.py | 110 +++++++++++++++++------------------- 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index b472bccf..94f7352a 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -22,7 +22,7 @@ class InitManager(Task): name = "InitManager" - + def run(self, *args, **kwargs): ResourceManager() if wannadb.resources.MANAGER is None: @@ -35,13 +35,13 @@ class BaseTask(Task): name = "BaseTask" _signals: Optional[Signals] = None _redis_client: Optional[RedisCache] = None - + def __init__(self): super().__init__() - + def run(self, *args, **kwargs): raise NotImplementedError("BaseTask is abstract") - + @staticmethod def load(): if wannadb.resources.MANAGER is None: @@ -49,34 +49,34 @@ def load(): BaseTask.load() return logging.info("loaded") - + def update(self, - state: State, - meta: Optional[dict[str, Any]] = None, - ) -> None: + state: State, + meta: Optional[dict[str, Any]] = None, + ) -> None: super().update_state(state=state.value, meta=meta) - + def update_state(self, - task_id: Optional[str] = None, - state: Optional[str] = None, - meta: Any = None, - **kwargs: Any - ) -> None: + task_id: Optional[str] = None, + state: Optional[str] = None, + meta: Any = None, + **kwargs: Any + ) -> None: raise NotImplementedError("user update() instead") - + def get_new_input(self): if self._redis_client is None: raise RuntimeError("self._redis_client is None!") _input = self._redis_client.get("input") if _input is not None: pass - + return _input class TestTask(BaseTask): name = "TestTask" - + def run(self, *args, **kwargs): super().run() self.update(state=State.PENDING) @@ -92,9 +92,9 @@ def run(self, *args, **kwargs): class CreateDocumentBase(BaseTask): name = "CreateDocumentBase" - + def run(self, user_id: int, document_ids: list[int], attributes_strings: list[str], statistics_dump: bytes, - base_name: str, organisation_id: int): + base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] statistics: Statistics = pickle.loads(statistics_dump) @@ -106,23 +106,23 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + """ init api """ api = WannaDB_WebAPI(user_id, base_name, organisation_id) - + """ Creating document base """ if not isinstance(attributes[0], Attribute): self.update(State.ERROR) raise Exception("Invalid attributes") - + if not isinstance(statistics, Statistics): self.update(State.ERROR) raise Exception("Invalid statistics") - + docs = getDocuments(document_ids, user_id) self.update(State.PENDING) documents: list[Document] = [] @@ -132,9 +132,9 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st else: self.update(State.ERROR) raise Exception("No documents found") - + api.create_document_base(documents, attributes, statistics) - + api.save_document_base_to_bson() self.update(State.SUCCESS) return self @@ -142,11 +142,11 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st class DocumentBaseAddAttributes(BaseTask): name = "DocumentBaseAddAttributes" - + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] - + for attribute_string in attributes_strings: if attribute_string == "": logger.error("Attribute names cannot be empty!") @@ -155,7 +155,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.add_attributes(attributes) @@ -166,11 +166,11 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ class DocumentBaseRemoveAttributes(BaseTask): name = "DocumentBaseRemoveAttributes" - + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] - + for attribute_string in attributes_strings: if attribute_string == "": logger.error("Attribute names cannot be empty!") @@ -179,7 +179,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.remove_attributes(attributes) @@ -191,11 +191,11 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ class DocumentBaseForgetMatches(BaseTask): name = "DocumentBaseForgetMatches" - + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] - + for attribute_string in attributes_strings: if attribute_string == "": logger.error("Attribute names cannot be empty!") @@ -204,7 +204,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.forget_matches() @@ -216,12 +216,12 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ class DocumentBaseForgetMatchesForAttribute(BaseTask): name = "DocumentBaseForgetMatches" - + def run(self, user_id: int, attribute_string: str, base_name: str, organisation_id: int): self.load() - + attribute = (Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.forget_matches_for_attribute(attribute) @@ -233,12 +233,12 @@ def run(self, user_id: int, attribute_string: str, base_name: str, organisation_ class DocumentBaseInteractiveTablePopulation(BaseTask): name = "DocumentBaseInteractiveTablePopulation" - + def run(self, user_id: int, base_name: str, organisation_id: int): self._signals = Signals(str(self.request.id)) self._redis_client = RedisCache(str(self.request.id)) self.load() - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.interactive_table_population() @@ -250,12 +250,12 @@ def run(self, user_id: int, base_name: str, organisation_id: int): class DocumentBaseGetOrderedNuggets(BaseTask): name = "DocumentBaseGetOrderedNuggets" - + def run(self, user_id: int, base_name: str, organisation_id: int, document_id: int): self._signals = Signals(str(self.request.id)) self._redis_client = RedisCache(str(self.request.id)) self.load() - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.get_ordert_nuggets(document_id) @@ -264,9 +264,6 @@ def run(self, user_id: int, base_name: str, organisation_id: int, document_id: i return self - - - class DocumentBaseConfirmNugget(BaseTask): name = "DocumentBaseGetOrderedNuggets" @@ -300,15 +297,14 @@ def run(self, user_id: int, base_name: str, organisation_id: int, api.update_document_base_to_bson() self.update(State.SUCCESS) return self - def nugget_exist(nugget: str, document: Document, start_index: int, end_index: int): if document.text.rfind(nugget, start_index, end_index) >= 0: - return True + return True else: - raise Exception("Nugget does not exist in the given Text") - + raise Exception("Nugget does not exist in the given Text") + def match_feedback(nugget: Union[str, InformationNugget], document: Document, start_index: int = None, end_index: int = None): @@ -320,16 +316,16 @@ def match_feedback(nugget: Union[str, InformationNugget], document: Document, if start_index is None or end_index is None: logger.error("Start-index or end-index are missing to find the custom nugget") raise Exception("Start-index or end-index are missing to find the custom nugget") - elif nugget_exist(nugget, document,start_index, end_index): + elif nugget_exist(nugget, document, start_index, end_index): return { - "message": "custom-match", - "document": document, - "start": start_index, - "end": end_index + "message": "custom-match", + "document": document, + "start": start_index, + "end": end_index } else: return { - "message": "is-match", - "nugget": nugget , - "not-a-match": None - } \ No newline at end of file + "message": "is-match", + "nugget": nugget, + "not-a-match": None + } From 5cc2bfba844b7e7e53a5f802218684810f7d9f9f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 14:27:45 +0100 Subject: [PATCH 204/254] formatting --- wannadb_web/worker/data.py | 71 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index da9b5871..fe65fd63 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -20,7 +20,7 @@ def nugget_to_json(nugget: InformationNugget): return { "text": nugget.text, "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in - nugget.signals.items()], + nugget.signals.items()], "document": {"name": nugget.document.name, "text": nugget.document.text}, "end_char": str(nugget.end_char), "start_char": str(nugget.start_char)} @@ -38,7 +38,7 @@ def document_to_json(document: Document): "text": document.text, "attribute_mappings": "not implemented yet", "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in - document.signals.items()], + document.signals.items()], "nuggets": [nugget_to_json(nugget) for nugget in document.nuggets] } @@ -52,8 +52,8 @@ def attribute_to_json(attribute: Attribute): def document_base_to_json(document_base: DocumentBase): return { 'msg': {"attributes ": [attribute.name for attribute in document_base.attributes], - "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] - } + "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] + } } @@ -71,38 +71,38 @@ def __init__(self, user_id: str): self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) self.ordert_nuggets = _Nuggets("ordert_nuggets", user_id) self.match_feedback = _MatchFeedback("match_feedback", user_id) - + def to_json(self) -> dict[str, str]: return {self.feedback.type: self.feedback.to_json(), - self.error.type: self.error.to_json(), - self.status.type: self.status.to_json(), - self.finished.type: self.finished.to_json(), - self.document_base_to_ui.type: self.document_base_to_ui.to_json(), - self.statistics.type: self.statistics.to_json(), - self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), - self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} - + self.error.type: self.error.to_json(), + self.status.type: self.status.to_json(), + self.finished.type: self.finished.to_json(), + self.document_base_to_ui.type: self.document_base_to_ui.to_json(), + self.statistics.type: self.statistics.to_json(), + self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + def reset(self): RedisCache(self.__user_id).delete_user_space() class Emitable(abc.ABC): - + def __init__(self, emitable_type: str, user_id: str): self.type = emitable_type self.redis = RedisCache(user_id) - + @property def msg(self): msg = self.redis.get(self.type) if msg is None: return None return msg - + @abstractmethod def to_json(self): raise NotImplementedError - + @abstractmethod def emit(self, status: Any): raise NotImplementedError @@ -126,89 +126,90 @@ def to_json(self): def emit(self, status: dict[str, Any]): self.redis.set(self.type, json.dumps(status)) -class _State(Emitable): +class _State(Emitable): + def to_json(self): if self.msg is None: return "" return self.msg.decode("utf-8") - + def emit(self, status: str): self.redis.set(self.type, status) class _Signal(Emitable): - + def to_json(self): return str(self.msg) - + def emit(self, status: float): self.redis.set(self.type, str(status)) - -class _Error(Emitable): +class _Error(Emitable): + def to_json(self): if self.msg is None: return "" return self.msg.decode("utf-8") - + def emit(self, exception: BaseException): self.redis.set(self.type, str(exception)) class _Nuggets(Emitable): - + def to_json(self): if self.msg is None: return {} if not isinstance(self.msg, str): raise TypeError("_Nugget msg must be of type str") return self.msg - + def emit(self, status: list[InformationNugget]): self.redis.set(self.type, json.dumps(nuggets_to_json(status))) class _DocumentBase(Emitable): - + def to_json(self): if self.msg is None: return {} return json.loads(self.msg) - + def emit(self, status: DocumentBase): self.redis.set(self.type, json.dumps(document_base_to_json(status))) class _Statistics(Emitable): - + @property def msg(self): return "not implemented" - + def to_json(self): return Statistics(False).to_serializable() - + def emit(self, statistic: Statistics): pass class _Feedback(Emitable): - + def to_json(self): if self.msg is None: return {} return json.loads(self.msg) - + def emit(self, status: dict[str, Any]): self.redis.set(self.type, json.dumps(status)) class _Dump(Emitable): - + def to_json(self): return self.msg - + def emit(self, status): self.redis.set(self.type, json.dumps(status)) From 52d4952a15094fb8a762dae8862f7d1360d7346e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 14:28:35 +0100 Subject: [PATCH 205/254] feat(interaction_callback_fn): now waits 3 min for match_feedback --- wannadb_web/worker/Web_API.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index c03614c9..34adde55 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -52,9 +52,15 @@ def status_callback_fn(message, progress): def interaction_callback_fn(pipeline_element_identifier, feedback_request): feedback_request["identifier"] = pipeline_element_identifier self.signals.feedback_request_to_ui.emit(feedback_request) - logger.info("Waiting for feedback...") - time.sleep(2) - + + start_time = time.time() + while (time.time() - start_time) < 300: + msg = self.signals.match_feedback.msg + if msg is not None: + return msg + time.sleep(2) + raise TimeoutError("no match_feedback in time provided") + self.interaction_callback = InteractionCallback(interaction_callback_fn) if wannadb.resources.MANAGER is None: From f1a51191e69450879845a5a0d1e5ec4756d97241 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 16:44:13 +0100 Subject: [PATCH 206/254] pre merge: add new feat from main to branch --- wannadb_web/worker/tasks.py | 60 +++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 94f7352a..d68ce3e4 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -136,7 +136,27 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st api.create_document_base(documents, attributes, statistics) api.save_document_base_to_bson() - self.update(State.SUCCESS) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self + + +class DocumentBaseLoad(BaseTask): + name = "DocumentBaseLoad" + + def run(self, user_id: int, base_name: str, organisation_id: int): + self.load() + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + #self.update(State.SUCCESS) + #return self + if api.signals.error.msg is None: + self.update(State.SUCCESS) + return self + self.update(State.ERROR) return self @@ -159,10 +179,38 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.add_attributes(attributes) - api.update_document_base_to_bson() - self.update(State.SUCCESS) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) return self +class DocumentBaseUpdateAttributes(BaseTask): + name = "DocumentBaseAddAttributes" + + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): + self.load() + attributes: list[Attribute] = [] + + for attribute_string in attributes_strings: + if attribute_string == "": + logger.error("Attribute names cannot be empty!") + raise Exception("Attribute names cannot be empty!") + if attribute_string in [attribute.name for attribute in attributes]: + logger.error("Attribute names must be unique!") + raise Exception("Attribute names must be unique!") + attributes.append(Attribute(attribute_string)) + + api = WannaDB_WebAPI(user_id, base_name, organisation_id) + api.load_document_base_from_bson() + api.update_attributes(attributes) + if api.signals.error.msg is None: + api.update_document_base_to_bson() + self.update(State.SUCCESS) + return self + self.update(State.ERROR) + return self class DocumentBaseRemoveAttributes(BaseTask): name = "DocumentBaseRemoveAttributes" @@ -187,6 +235,8 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.update_document_base_to_bson() self.update(State.SUCCESS) return self + self.update(State.ERROR) + return self class DocumentBaseForgetMatches(BaseTask): @@ -212,6 +262,8 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ api.update_document_base_to_bson() self.update(State.SUCCESS) return self + self.update(State.ERROR) + return self class DocumentBaseForgetMatchesForAttribute(BaseTask): @@ -229,6 +281,8 @@ def run(self, user_id: int, attribute_string: str, base_name: str, organisation_ api.update_document_base_to_bson() self.update(State.SUCCESS) return self + self.update(State.ERROR) + return self class DocumentBaseInteractiveTablePopulation(BaseTask): From 2ae7f88dbf45134e0d7a64657f096b8da9ac5919 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 16:44:36 +0100 Subject: [PATCH 207/254] pre merge: add new feat from main to branch --- wannadb_web/routing/core.py | 157 +++++++++++++++++++++++++++--------- 1 file changed, 121 insertions(+), 36 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index ca2bbe62..a0ca1a24 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,7 +39,8 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseGetOrderedNuggets +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseLoad, \ + DocumentBaseUpdateAttributes, DocumentBaseGetOrderedNuggets core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -47,19 +48,15 @@ @core_routes.route('/document_base', methods=['POST']) -def create_document(): +def create_document_base(): """ Endpoint for creating a document base. This endpoint is used to create a document base from a list of document ids and a list of attributes. - Example Header: - { - "Authorization": "your_authorization_token" - } - - Example JSON Payload: + Example Form Payload: { + "authorization": "your_authorization_token" "organisationId": "your_organisation_id", "baseName": "your_document_base_name", "document_ids": "1, 2, 3", @@ -67,7 +64,6 @@ def create_document(): } """ form = request.form - # authorization = request.headers.get("authorization") authorization = form.get("authorization") organisation_id: Optional[int] = form.get("organisationId") base_name = form.get("baseName") @@ -93,30 +89,55 @@ def create_document(): return make_response({'task_id': task.id}, 202) - -@core_routes.route('/document_base/attributes', methods=['UPDATE']) -def document_base(): +@core_routes.route('/document_base/load', methods=['POST']) +def load_document_base(): """ - Endpoint for update a document base. + Endpoint for loading a document base. - This endpoint is used to update a document base from a list of attributes. + This endpoint is used to load a document base from a name and an organisation id. - Example Header: - { - "Authorization": "your_authorization_token" - } + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + } + """ + form = request.form + authorization = form.get("authorization") + organisation_id: Optional[int] = form.get("organisationId") + base_name = form.get("baseName") + if (organisation_id is None or base_name is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) - Example JSON Payload: + if _token is False: + return make_response({"error": "invalid token"}, 401) + + user_id = _token.id + + task = DocumentBaseLoad().apply_async(args=(user_id, base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) + + +@core_routes.route('/document_base/attributes/add', methods=['POST']) +def document_base_attribute_add(): + """ + Endpoint for add attributes to a document base. + + This endpoint is used to add attributes to a document base from a list of attributes. + + Example Form Payload: { + "authorization": "your_authorization_token" "organisationId": "your_organisation_id", "baseName": "your_document_base_name", - "attributes": [ - "plane","car","bike" - ] + "attributes": "plane,car,bike" } """ form = request.form - # authorization = request.headers.get("authorization") authorization = form.get("authorization") organisation_id = form.get("organisationId") base_name = form.get("baseName") @@ -129,6 +150,8 @@ def document_base(): if _token is False: return make_response({"error": "invalid token"}, 401) + attributes_strings = attributes_string.split(",") + attributes = [] for att in attributes_string: attributes.append(Attribute(att)) @@ -136,8 +159,56 @@ def document_base(): statistics = Statistics(False) user_id = _token.id - attributesDump = pickle.dumps(attributes) - statisticsDump = pickle.dumps(statistics) + #attributesDump = pickle.dumps(attributes) + #statisticsDump = pickle.dumps(statistics) + task = DocumentBaseAddAttributes().apply_async(args=(user_id, attributes_strings, + base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) + + +@core_routes.route('/document_base/attributes/update', methods=['POST']) +def document_base_attribute_update(): + """ + Endpoint for update the attributes of a document base. + + This endpoint is used to update the attributes of a document base from a list of attributes. + + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "attributes": "plane,car,bike" + } + """ + form = request.form + authorization = form.get("authorization") + organisation_id = form.get("organisationId") + base_name = form.get("baseName") + attributes_string = form.get("attributes") + if (organisation_id is None or base_name is None or attributes_string is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + attributes_string = attributes_string.split(",") + + #attributes = [] + #for att in attributes_string: + # attributes.append(Attribute(att)) + # + #statistics = Statistics(False) + + user_id = _token.id + + task = DocumentBaseUpdateAttributes().apply_async(args=(user_id, attributes_string, + base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) # @core_routes.route('/longtask', methods=['POST']) @@ -147,23 +218,37 @@ def document_base(): # task_id=task.id)} -@core_routes.route('/status/', methods=['GET']) -def task_status(task_id: str): +@core_routes.route('/status//', methods=['GET']) +def task_status(token: str,task_id: str): + + _token = tokenDecode(token) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + user_id = _token.id + task: AsyncResult = BaseTask().AsyncResult(task_id=task_id) status = task.status if status == "FAILURE": - return make_response({"state": "FAILURE", "meta": Signals(task_id).to_json()}, 500) + return make_response({"state": "FAILURE", "meta": Signals(user_id).to_json()}, 500) if status == "SUCCESS": - return make_response({"state": "SUCCESS", "meta": Signals(task_id).to_json()}, 200) + signals = Signals(user_id).to_json() + return make_response({"state": "SUCCESS", "meta": signals}, 200) if status is None: return make_response({"error": "task not found"}, 500) - return make_response({"state": task.status, "meta": Signals(task_id).to_json()}, 202) + + signals = Signals(user_id).to_json() + return make_response({"state": task.status, "meta": signals}, 202) @core_routes.route('/status/', methods=['POST']) def task_update(task_id: str): - redis_client = RedisCache(task_id).redis_client - redis_client.set("input", "test") + signals = Signals(task_id) + + ## todo: hier muss feedback emitted werden im format: + ## { ------------------ } + + signals.feedback_request_from_ui.emit(request.json.get("feedback")) ## todo: renaming of the endpoint @@ -196,12 +281,12 @@ def sort_nuggets(): if organisation_id is None or base_name is None or document_id is None or authorization is None: return make_response({"error": "missing parameters"}, 400) _token = tokenDecode(authorization) - + if _token is False: return make_response({"error": "invalid token"}, 401) - + user_id = _token.id - + task = DocumentBaseGetOrderedNuggets().apply_async(args=(user_id, base_name, organisation_id, document_id)) - + return make_response({'task_id': task.id}, 202) From a633885b1895e6237a9840907230d13ac4f5bf0e Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 16:50:14 +0100 Subject: [PATCH 208/254] pre merge: add new feat from main to branch --- celery_app.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/celery_app.py b/celery_app.py index 742d517e..e783583b 100644 --- a/celery_app.py +++ b/celery_app.py @@ -3,7 +3,7 @@ from celery import Celery -from wannadb_web.worker.tasks import BaseTask, TestTask, InitManager, CreateDocumentBase +from wannadb_web.worker.tasks import BaseTask, DocumentBaseAddAttributes, DocumentBaseForgetMatches, DocumentBaseForgetMatchesForAttribute, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, DocumentBaseRemoveAttributes, DocumentBaseUpdateAttributes, TestTask, InitManager, CreateDocumentBase logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -15,3 +15,10 @@ app.register_task(TestTask) app.register_task(InitManager) app.register_task(CreateDocumentBase) +app.register_task(DocumentBaseLoad) +app.register_task(DocumentBaseAddAttributes) +app.register_task(DocumentBaseUpdateAttributes) +app.register_task(DocumentBaseRemoveAttributes) +app.register_task(DocumentBaseForgetMatches) +app.register_task(DocumentBaseForgetMatchesForAttribute) +app.register_task(DocumentBaseInteractiveTablePopulation) From 8effa0131b69043468968a7f76d179f003c31624 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 17:05:15 +0100 Subject: [PATCH 209/254] pre merge: add new feat from main to branch --- wannadb_web/worker/data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index fe65fd63..d5275af5 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -68,12 +68,14 @@ def __init__(self, user_id: str): self.document_base_to_ui = _DocumentBase("document_base_to_ui", user_id) self.statistics = _Statistics("statistics_to_ui", user_id) self.feedback_request_to_ui = _Feedback("feedback_request_to_ui", user_id) + self.feedback_request_from_ui = _Feedback("feedback_request_from_ui", user_id) self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) self.ordert_nuggets = _Nuggets("ordert_nuggets", user_id) self.match_feedback = _MatchFeedback("match_feedback", user_id) def to_json(self) -> dict[str, str]: - return {self.feedback.type: self.feedback.to_json(), + return {"user_id": self.__user_id, + self.feedback.type: self.feedback.to_json(), self.error.type: self.error.to_json(), self.status.type: self.status.to_json(), self.finished.type: self.finished.to_json(), From 2999991d875c6ae4425f82c7fbe51dcc834be9ee Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 17:05:17 +0100 Subject: [PATCH 210/254] pre merge: add new feat from main to branch --- wannadb_web/routing/files.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/wannadb_web/routing/files.py b/wannadb_web/routing/files.py index 69701d3c..dfed76e7 100644 --- a/wannadb_web/routing/files.py +++ b/wannadb_web/routing/files.py @@ -1,6 +1,7 @@ from flask import Blueprint, request, make_response -from wannadb_web.postgres.queries import deleteDocumentContent, getDocument, getDocumentsForOrganization, updateDocumentContent +from wannadb_web.postgres.queries import deleteDocumentContent, getDocument, getDocumentsForOrganization, \ + updateDocumentContent, getDocumentBaseForOrganization from wannadb_web.util import tokenDecode from wannadb_web.postgres.transactions import addDocument @@ -55,6 +56,20 @@ def get_files_for_organization(_id): return make_response(documents, 200) +@main_routes.route('/organization/get/documentbase/<_id>', methods=['GET']) +def get_documentbase_for_organization(_id): + authorization = request.headers.get("authorization") + org_id = int(_id) + + token = tokenDecode(authorization) + if token is None: + return make_response({'error': 'no authorization'}, 401) + + + document_base = getDocumentBaseForOrganization(org_id) + + return make_response(document_base, 200) + @main_routes.route('/update/file/content', methods=['POST']) def update_file_content(): authorization = request.headers.get("authorization") From 8263a5de5c82f13181e7866c4ba6b6766b24b359 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 17:05:19 +0100 Subject: [PATCH 211/254] pre merge: add new feat from main to branch --- wannadb_web/postgres/queries.py | 38 +++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index ee90badc..5ae15390 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -143,7 +143,7 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int) Exception: if multiple documents with that name are found """ - select_query = sql.SQL("""SELECT name,content,content_byte + select_query = sql.SQL("""SELECT id,content,content_byte FROM documents d JOIN membership m ON d.organisationid = m.organisationid WHERE d.name = (%s) AND m.userid = (%s) AND m.organisationid = (%s) @@ -152,13 +152,13 @@ def getDocument_by_name(document_name: str, organisation_id: int, user_id: int) result = execute_query(select_query, (document_name, user_id, organisation_id,)) if len(result) == 1: document = result[0] - name = document[0] + id = document[0] if document[1]: content = document[1] - return str(name), str(content) + return str(id), str(content) elif document[2]: content = document[2] - return str(name), bytes(content) + return str(id), bytes(content) elif len(result) > 1: raise Exception("Multiple documents with the same name found") raise Exception("No document with that name found") @@ -200,9 +200,9 @@ def getDocumentsForOrganization(organisation_id: int): for document in result: id = document[0] name = document[1] - content = '' - if document[2]: - content = document[2] + if document[2] == None: + continue + content = document[2] doc_array.append({ "id": id, "name": name, @@ -210,6 +210,30 @@ def getDocumentsForOrganization(organisation_id: int): }) return doc_array +def getDocumentBaseForOrganization(organisation_id: int): + select_query = sql.SQL("""SELECT id, name,content,content_byte + FROM documents + WHERE organisationid = (%s) + """) + result = execute_query(select_query, (organisation_id,)) + + if result is None or len(result) == 0: + return [] + + doc_array = [] + + for document in result: + id = document[0] + name = document[1] + if document[3] == None: + continue + content = document[3] + doc_array.append({ + "id": id, + "name": name, + }) + return doc_array + def updateDocumentContent(doc_id: int, new_content): try: From 9a6e90e293769bc17eca1508aa7e84bd5a6a2197 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 5 Feb 2024 17:05:22 +0100 Subject: [PATCH 212/254] pre merge: add new feat from main to branch --- wannadb_web/worker/Web_API.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 34adde55..f98c3ad4 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -191,6 +191,7 @@ def load_document_base_from_bson(self): logger.info(f"Document base loaded from BSON with id {document_id}.") self.document_base = document_base + self.document_id = document_id except Exception as e: logger.error(str(e)) @@ -322,6 +323,17 @@ def remove_attributes(self, attributes: list[Attribute]): else: logger.error("Attribute name does not exist!") self.signals.error.emit(Exception("Attribute name does not exist!")) + + def update_attributes(self, attributes: list[Attribute]): + logger.debug("Called function 'update_attributes'.") + self.document_base.attributes.clear() + for attribute in attributes: + if attribute is None: + logger.info("Attribute name must not be empty and was thus ignored.") + else: + self.document_base.attributes.append(attribute) + self.sqLiteCacheDBWrapper.cache_db.create_table_by_name(attribute.name) + logger.debug(f"Attribute '{attribute.name}' added.") def forget_matches_for_attribute(self, attribute: Attribute): logger.debug("Called function 'forget_matches_for_attribute'.") From 2a7d265899440cae39c1a87111c5fd0f167d9eda Mon Sep 17 00:00:00 2001 From: Ramzes Khotambekzoda Date: Tue, 6 Feb 2024 14:26:55 +0100 Subject: [PATCH 213/254] fixed bug --- Dockerfile | 3 + app.py | 4 +- docker-compose-prod.yaml | 146 +++++++++--------- .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 128 bytes .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 136 bytes .../routing/__pycache__/core.cpython-39.pyc | Bin 0 -> 7320 bytes wannadb_web/routing/core.py | 3 +- 7 files changed, 79 insertions(+), 77 deletions(-) create mode 100644 wannadb_web/__pycache__/__init__.cpython-39.pyc create mode 100644 wannadb_web/routing/__pycache__/__init__.cpython-39.pyc create mode 100644 wannadb_web/routing/__pycache__/core.cpython-39.pyc diff --git a/Dockerfile b/Dockerfile index 51d05a1a..a87bd3f4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,9 @@ RUN apt-get update \ RUN mkdir /home/wannadb WORKDIR /home/wannadb +# update Pip +RUN pip install --upgrade pip + # install torch RUN pip install --use-pep517 torch==1.10.0 diff --git a/app.py b/app.py index cfface70..a25b21e9 100644 --- a/app.py +++ b/app.py @@ -3,7 +3,7 @@ from flask import Flask, make_response, render_template_string from flask_cors import CORS -from flask_debugtoolbar import DebugToolbarExtension +#from flask_debugtoolbar import DebugToolbarExtension from wannadb_web.routing.core import core_routes from wannadb_web.routing.dev import dev_routes from wannadb_web.routing.user import user_management @@ -28,7 +28,7 @@ app.config['DEBUG'] = True # Register the Extensions CORS(app) -toolbar = DebugToolbarExtension(app) +#toolbar = DebugToolbarExtension(app) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index c6abd035..41735dd9 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -1,82 +1,82 @@ -version: '3.6' +version: "3.6" services: - wannadb: - build: - context: . - dockerfile: Dockerfile - target: prod - restart: always - tty: true - ports: - - '8000:8000' - env_file: - - wannadb_web/.env/.dev - depends_on: - - postgres - - redis - networks: - - mynetwork + wannadb: + build: + context: . + dockerfile: Dockerfile + target: prod + restart: always + tty: true + ports: + - "8000:8000" + env_file: + - wannadb_web/.env/.dev + depends_on: + - postgres + - redis + networks: + - mynetwork - worker: - build: - context: . - dockerfile: Dockerfile - target: worker - tty: true - command: ['celery', '-A', 'app.celery', 'worker', '-l', 'info'] - env_file: - - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb - networks: - - mynetwork - depends_on: - - wannadb - - redis + worker: + build: + context: . + dockerfile: Dockerfile + target: worker + tty: true + command: ["celery", "-A", "app.celery", "worker", "-l", "info"] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + depends_on: + - wannadb + - redis - flower: - build: - context: . - dockerfile: Dockerfile - target: worker - tty: true - command: ['celery', '-A', 'app.celery', 'flower'] - env_file: - - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb - networks: - - mynetwork - ports: - - '5555:5555' - depends_on: - - wannadb - - redis + flower: + build: + context: . + dockerfile: Dockerfile + target: worker + tty: true + command: ["celery", "-A", "app.celery", "flower"] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + ports: + - "5555:5555" + depends_on: + - wannadb + - redis - postgres: - image: postgres - container_name: postgres-container - environment: - POSTGRES_PASSWORD: 0 - POSTGRES_DB: userManagement - networks: - - mynetwork - ports: - - '5432:5432' - volumes: - - pgdata:/var/lib/postgresql/data + postgres: + image: postgres + container_name: postgres-container + environment: + POSTGRES_PASSWORD: 0 + POSTGRES_DB: userManagement + networks: + - mynetwork + ports: + - "5432:5432" + volumes: + - pgdata:/var/lib/postgresql/data - redis: - image: redis:alpine - container_name: redis-container - ports: - - '6379:6379' - networks: - - mynetwork + redis: + image: redis:alpine + container_name: redis-container + ports: + - "6379:6379" + networks: + - mynetwork networks: - mynetwork: - driver: bridge + mynetwork: + driver: bridge volumes: - pgdata: + pgdata: diff --git a/wannadb_web/__pycache__/__init__.cpython-39.pyc b/wannadb_web/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a13ac6b3fd8d85306d62457ed21d7ce4c12ec952 GIT binary patch literal 128 zcmYe~<>g`k0%oa=sUZ3>h(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o2B%KO;XkRlhtj qFE24A2}Z}4rzYvg$7kkcmc+;F6;$5hu*uC&Da}c>18M&Z#0&uLQ5tyw literal 0 HcmV?d00001 diff --git a/wannadb_web/routing/__pycache__/__init__.cpython-39.pyc b/wannadb_web/routing/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bafeb70b7e855c6806202bc5901092bf5b90e4d GIT binary patch literal 136 zcmYe~<>g`k0%oa=sUZ3>h(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o2B(KO;XkRlhtj yFE24A2}Z}4rzYta<(HOZ=B4Y$$7kkcmc+;F6;$5hu*uC&Da}c>0~zxfh#3IuKpnUM literal 0 HcmV?d00001 diff --git a/wannadb_web/routing/__pycache__/core.cpython-39.pyc b/wannadb_web/routing/__pycache__/core.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04687dd29fc8b080c40f5a0904f1c22caea1c46d GIT binary patch literal 7320 zcmeHMOK;rP6(%_x4u_)|J?z->Gqjx8o>q~Xq%9i33F_EZ;=q*+KahfLF{b8H5~U$I zx#ZN5O0Aq_0<>9mk%hA<;9Y-1S4Dw#)fLfIyYjkSd6RzU+<8fmT&Ih+0ZL)+>)d

zwbLd*s*aj)re-;djBGUeUC_QDyuqqw+)i(pc9tp?R9nh0)PHgY%MCLSE{aQQ0f& zL^C^9RPid2?b%V)t8!bp<3u&D#$~HB5!Jmqmn)q{H0e!p+3rk5)7~_ftDTwXh&-?dyc5w$?__k!I~ASwPU~7w5fi}~Q5TKRO5WMPc>Hp3 z=1bjkgR`6G#H5&7*TwW_hL{mYKG7cO-W<0d9k#zDj&b{WZa+S3pA{##{S|IMIcz^A zPILRK+cmwM4qvM&Mqg z(HGsdUfj;YB=$RDw(TZsO#9G}V_&Sg_irpX&W+3s<94Sfz{z*t?ev1K4CBm={U{KM zO@9#=-G^beF_=gO9X~^lEOFa1@Ut*pcSX|fMF9q{`e~4Y`5j*UlH=lg&0VB5EpqRm zHR#)-zV4;lx9;9+F1goZ(M@Q2+Q*Quzn3!@JJ*745Q`vghe0~v&j+#Oei*E}YZA+U zn8+<=wBg61LkoQW-o4wd3_j`wX@>2_0+qCz#JpOy$aWI1h3gAhvK7SFf_5T;C3m@h zip)$UILPzkB_I4scEC%KdJ)zlZfO1(VUm4{vOEpGyv}?7@Qv-EAy8 z=GvM}B6p3&Cycs@%oO7hZCnKVJ;s_8OIpR-YT%_-i^QhYYBCE@7G$8-)><7%)}=8H zsvj2RI9K>Im)x5H4)a60TbX1V&Qk)Vm8M?Ux4KG@o28;U+*VPK{4Hon{}zi%|8g;) zb|u}8+joMr*U3<=Udb{Uu0oqoaqecgwKNOcDb?--B22IP?G2*U#;zXK%H41s>XSCD zV&*CddT^~T5J0hV!Mx)Hvy8;)~++c+35+0Nb(tXU3`T1=%f$ z(+I>$Z+$(;(xNtYf;(l_D%wR)c3NEtQ22PKu_*eBWc4acRxfNO3UK)%ZPTd^z8VS@ z{I24c{uN24ZR)#Pu000Wf2r^4TryBHxKu)^#3d6YlS^fk%DEw|o|ZnyOa0vJ=jDE` zQ!eAIBr3uVby0l;xXa93f28kPxg{J>KV@nS)Cr>I<;P`FXYOXdWM=4z5tZE9tnAvE z{kWP}(4(3;kF?ENZs%rR&Z~JTH`Xke1%1~6&&k}`tbpqjacxffzlL37DSc7MsWSFL zFcm-nhkNN5{*hV^4xRB27zFL|-45i8katK7g@T?D$sdR&VI~1cU-8}LL`Lpyf4c*j zQ7G^Wr&^ucgFJIf?%a0LldV0iEtcXr=0+Un61ncjVai~2LktIvH@8ADH$NIdz`O!= z9`+sVfMtp`eZ2dSiv7`$*XP|g=G`~PyNvcp9l9Mq4(8jwoL>#M0=3j#p1!E>*-OzF zU*!!^*n_1N6XUZM&S-*_q8vz>$Y(QAn5KlSU7u_MU^0FBOWghPvw9f+9CndR7@+&< zuh95RpMRnkrZ3miqLM+YaU@Uw0F`J?Nb-mZ6Lu*oN>D>-qAtmaD#~dW=2hBMHbPNY z-LSpY35v4lMcq_RPhJUVZePtL8-8O8!VW1tkg}kAgU)d%3J@nr>vKN#VNvQV# z5rGh$Dg)8yD7+958NvkUTn6Z<9KI-Ef`0d>5r+hVEjoPx=$Ir?fJyF_84BJyLEN zM-BOX(09)4lkAbkF8X4xj1w3TlCX^b8HE>;aWciq*cze?X0!jLKx>*N+|i7D#Pk{b9n^4ccp(>|1s7OWUNAW2Bg_D<y>ejy}6b zTOGsHZGDRDlIbX`^cF_#xQFX*kNxsCdJR9a@k90fEh_&5>I-jo(}Jn0z% zE3BH8O)`W7)yxl6O|@BmLJp5MCBOCSXy}VF`M;u$Y~a78sMW)*At|a-%PA_cTG|Jx zrThWWpKlk>9;S5ark{XAO6FNRT<>i11KA5)Lcg68i{zg6(#6Z{cE5vM`PuIjrT4Di zYZ^tF32ge?2J!BHd)z~%dpSaSc$MOv3iEb0Lx@t8An7KXH)GS-EoCKW1zwn<3_}I? zj~qRiLu<>BKSqBmgCTpQKdyYKi3-AZY&OYUf}W3P&@Q}I@j|6O%4IAI4Re0ZbJA;7 z8QdW^8bP!5$M9RWSzlRJ4$sibd7|?^i^}qq8#nLYxlY@s^9c$QZxuiM2Xqx7i2GNs zUcYF;q8I}JK7H%=%<)&myzy(4;wD4&Skp*mG9B!#8UThcQhkD8>lnI9Mr4SQ|5xn_z9r~?^?dS2nzO@sZmN&k-r8i(gZ zVh?}bsORH;`>`MT2LOYI&2z&d)j>?;{lG^wY{{u|C`5$~`$1T8kPZ)pzMcts zE6}7}di8eD3FJ0{))X{m`1Gdxp^^o}XnjuD`}Jl9?SzFYl*}Hnce+oYn92f2mQ~8} zWm%NtLmB3g9m?Q%}JL3EjX4_AfQ`m!`yquwG z6|t+EIHy1s7M&0BGXL+0gEah}mE;&pvercj5huoP20T9TF^uF3wMvo2-Cw6qaE0=avv1Dm8SciJyz#JbrI9^qFG~eX7>b{|Qo{yA1#U literal 0 HcmV?d00001 diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index a0ca1a24..e6d4f412 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -31,9 +31,8 @@ import pickle from typing import Optional -from celery.result import AsyncResult from flask import Blueprint, make_response, request - +from celery.result import AsyncResult from wannadb.data.data import Attribute from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache From 8cccb33f5e4534be5573bfc1429239963795317e Mon Sep 17 00:00:00 2001 From: cophilot Date: Wed, 7 Feb 2024 17:33:42 +0100 Subject: [PATCH 214/254] added interactive-table-pop-endpoint and bug fixes for the interactive table pop --- wannadb/configuration.py | 6 ++++-- wannadb/data/data.py | 6 ++++++ wannadb_web/routing/core.py | 37 +++++++++++++++++++++++++++++++++-- wannadb_web/worker/Web_API.py | 2 +- wannadb_web/worker/data.py | 4 ++++ 5 files changed, 50 insertions(+), 5 deletions(-) diff --git a/wannadb/configuration.py b/wannadb/configuration.py index a39c5391..04bef45a 100644 --- a/wannadb/configuration.py +++ b/wannadb/configuration.py @@ -247,9 +247,11 @@ def __call__( logger.info("Execute the pipeline.") tick: float = time.time() status_callback("Running the pipeline...", -1) + - for ix, pipeline_element in enumerate(self._pipeline_elements): - pipeline_element(document_base, interaction_callback, status_callback, statistics[f"pipeline-element-{ix}"]) + for i, pipeline_element in enumerate(self._pipeline_elements): + print(f"Running pipeline element {pipeline_element}...") + pipeline_element(document_base, interaction_callback, status_callback, statistics[f"pipeline-element-{str(i)}"]) status_callback("Running the pipeline...", 1) tack: float = time.time() diff --git a/wannadb/data/data.py b/wannadb/data/data.py index 92a8a2aa..a791e8ce 100644 --- a/wannadb/data/data.py +++ b/wannadb/data/data.py @@ -152,6 +152,12 @@ def __hash__(self) -> int: def __eq__(self, other) -> bool: return isinstance(other, Attribute) and self._name == other._name and self._signals == other._signals + + def toJSON(self): + print("toJSON") + return { + "name": self._name + } @property def name(self) -> str: diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index e6d4f412..8b53b23a 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -38,7 +38,7 @@ from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseLoad, \ +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, \ DocumentBaseUpdateAttributes, DocumentBaseGetOrderedNuggets core_routes = Blueprint('core_routes', __name__, url_prefix='/core') @@ -120,6 +120,39 @@ def load_document_base(): return make_response({'task_id': task.id}, 202) +@core_routes.route('/document_base/interactive', methods=['POST']) +def interactive_document_base(): + """ + Endpoint for interactive document population + + This endpoint is used to load a document base from a name and an organisation id. + + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + } + """ + form = request.form + authorization = form.get("authorization") + organisation_id: Optional[int] = form.get("organisationId") + base_name = form.get("baseName") + + if (organisation_id is None or base_name is None + or authorization is None): + return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + user_id = _token.id + + task = DocumentBaseInteractiveTablePopulation().apply_async(args=(user_id, base_name, organisation_id)) + + return make_response({'task_id': task.id}, 202) + @core_routes.route('/document_base/attributes/add', methods=['POST']) def document_base_attribute_add(): @@ -252,7 +285,7 @@ def task_update(task_id: str): ## todo: renaming of the endpoint -@core_routes.route('/fix_button', methods=['POST']) +@core_routes.route('/document_base/order/nugget', methods=['POST']) def sort_nuggets(): """ Endpoint for creating a document base. diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index f98c3ad4..501a86d6 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -446,7 +446,7 @@ def find_additional_nuggets(nugget, documents): ) matching_phase(self.document_base, self.interaction_callback, self.status_callback, - self.signals.statistics.msg()) + Statistics(False)) self.signals.document_base_to_ui.emit(self.document_base) self.signals.finished.emit(1) except Exception as e: diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index d5275af5..17b55019 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -205,6 +205,10 @@ def to_json(self): return json.loads(self.msg) def emit(self, status: dict[str, Any]): + print("Status: " + str(status)) + for key, value in status.items(): + if isinstance(value, Attribute): + status[key] = value.toJSON() self.redis.set(self.type, json.dumps(status)) From c8c854ff39e99e04a3f7b4e15b318737378c6e08 Mon Sep 17 00:00:00 2001 From: cophilot Date: Thu, 8 Feb 2024 10:30:27 +0100 Subject: [PATCH 215/254] refactored sort_nuggets --- celery_app.py | 4 +++- wannadb_web/postgres/queries.py | 20 ++++++++++++++++++++ wannadb_web/routing/core.py | 16 +++++++++------- wannadb_web/worker/Web_API.py | 17 ++++++++++++++++- wannadb_web/worker/tasks.py | 8 +++++--- 5 files changed, 53 insertions(+), 12 deletions(-) diff --git a/celery_app.py b/celery_app.py index e783583b..73322a0c 100644 --- a/celery_app.py +++ b/celery_app.py @@ -3,7 +3,7 @@ from celery import Celery -from wannadb_web.worker.tasks import BaseTask, DocumentBaseAddAttributes, DocumentBaseForgetMatches, DocumentBaseForgetMatchesForAttribute, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, DocumentBaseRemoveAttributes, DocumentBaseUpdateAttributes, TestTask, InitManager, CreateDocumentBase +from wannadb_web.worker.tasks import BaseTask, DocumentBaseAddAttributes, DocumentBaseConfirmNugget, DocumentBaseForgetMatches, DocumentBaseForgetMatchesForAttribute, DocumentBaseGetOrderedNuggets, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, DocumentBaseRemoveAttributes, DocumentBaseUpdateAttributes, TestTask, InitManager, CreateDocumentBase logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -22,3 +22,5 @@ app.register_task(DocumentBaseForgetMatches) app.register_task(DocumentBaseForgetMatchesForAttribute) app.register_task(DocumentBaseInteractiveTablePopulation) +app.register_task(DocumentBaseGetOrderedNuggets) +app.register_task(DocumentBaseConfirmNugget) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 8d88b5c7..6d5885c5 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -185,6 +185,26 @@ def getDocument(document_id: int, user_id: int): else: return None +def getDocumentByNameAndContent(doc_name: str, doc_content: str, user_id: int): + select_query = sql.SQL("""SELECT name,content,content_byte + FROM documents + JOIN membership m ON documents.organisationid = m.organisationid + WHERE name = (%s) AND content = (%s) AND m.userid = (%s) + """) + + result = execute_query(select_query, (doc_name, doc_content, user_id,)) + if len(result) > 0: + for document in result: + name = document[0] + if document[1]: + content = document[1] + return str(name), str(content) + elif document[2]: + content = document[2] + return str(name), bytes(content) + else: + return None + def getDocumentsForOrganization(organisation_id: int): select_query = sql.SQL("""SELECT id, name,content,content_byte diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 96a3f3a9..fe38f3c1 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -297,24 +297,26 @@ def sort_nuggets(): Example Header: { - "Authorization": "your_authorization_token" } - Example JSON Payload: + Example Form Payload: { + "authorization": "your_authorization_token" "organisationId": "your_organisation_id", "baseName": "your_document_base_name", - "document_id": "1", (important: only one document id) - "attributes": "plane,car,bike" + "documentName": "your_document_name", + "documentContent": "your_document_content", } """ form = request.form authorization = form.get("authorization") organisation_id: Optional[int] = form.get("organisationId") base_name = form.get("baseName") - document_id = form.get("document_ids") - if organisation_id is None or base_name is None or document_id is None or authorization is None: + document_name = form.get("documentName") + document_content = form.get("documentContent") + if organisation_id is None or base_name is None or document_name is None or document_content is None or authorization is None: return make_response({"error": "missing parameters"}, 400) + _token = tokenDecode(authorization) if _token is False: @@ -322,7 +324,7 @@ def sort_nuggets(): user_id = _token.id - task = DocumentBaseGetOrderedNuggets().apply_async(args=(user_id, base_name, organisation_id, document_id)) + task = DocumentBaseGetOrderedNuggets().apply_async(args=(user_id, base_name, organisation_id, document_name, document_content)) return make_response({'task_id': task.id}, 202) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index e0ed80d9..1ccc5f15 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -23,7 +23,7 @@ from wannadb.statistics import Statistics from wannadb.status import StatusCallback from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper -from wannadb_web.postgres.queries import getDocument_by_name, updateDocumentContent, getDocument +from wannadb_web.postgres.queries import getDocument_by_name, getDocumentByNameAndContent, updateDocumentContent, getDocument from wannadb_web.postgres.transactions import addDocument from wannadb_web.worker.data import Signals @@ -122,6 +122,21 @@ def get_ordert_nuggets(self, document_id: int): logger.error(f"Document \"{document_name}\" not found in document base!") self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) + def get_ordered_nuggets_by_doc_name(self, document_name: str, document_content: str): + document = getDocumentByNameAndContent(document_name, document_content, self.user_id) + if document is None: + logger.error(f"Document {document_name} not found!") + self.signals.error.emit(Exception(f"Document {document_name} not found!")) + return + logger.debug("get_ordered_nuggets_by_doc_name") + self.signals.status.emit("get_ordered_nuggets_by_doc_name") + for document in self.document_base.documents: + if document.name == document_name: + self.signals.ordert_nuggets.emit(list(sorted(document.nuggets, key=lambda x: x[CachedDistanceSignal]))) + return + logger.error(f"Document \"{document_name}\" not found in document base!") + self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) + def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 21ed3b37..407f18be 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -305,21 +305,23 @@ def run(self, user_id: int, base_name: str, organisation_id: int): class DocumentBaseGetOrderedNuggets(BaseTask): name = "DocumentBaseGetOrderedNuggets" - def run(self, user_id: int, base_name: str, organisation_id: int, document_id: int): + #def run(self, user_id: int, base_name: str, organisation_id: int, document_id: int): + def run(self, user_id: int, base_name: str, organisation_id: int, document_name: str, document_content: str): self._signals = Signals(str(self.request.id)) self._redis_client = RedisCache(str(self.request.id)) self.load() api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() - api.get_ordert_nuggets(document_id) + #api.get_ordert_nuggets(document_id) + api.get_ordered_nuggets_by_doc_name(document_name, document_content) # no need to update the document base self.update(State.SUCCESS) return self class DocumentBaseConfirmNugget(BaseTask): - name = "DocumentBaseGetOrderedNuggets" + name = "DocumentBaseConfirmNugget" def run(self, user_id: int, base_name: str, organisation_id: int, document_id_for_nugget_x: int, nugget: Union[str, InformationNugget], From a7a461497c9329c7147dc705c5d7979433168930 Mon Sep 17 00:00:00 2001 From: cophilot Date: Thu, 8 Feb 2024 12:38:35 +0100 Subject: [PATCH 216/254] bug fixes with ordered nuggets --- wannadb_web/postgres/queries.py | 11 ++++++----- wannadb_web/worker/Web_API.py | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/wannadb_web/postgres/queries.py b/wannadb_web/postgres/queries.py index 6d5885c5..47ac5457 100644 --- a/wannadb_web/postgres/queries.py +++ b/wannadb_web/postgres/queries.py @@ -186,13 +186,14 @@ def getDocument(document_id: int, user_id: int): return None def getDocumentByNameAndContent(doc_name: str, doc_content: str, user_id: int): - select_query = sql.SQL("""SELECT name,content,content_byte - FROM documents - JOIN membership m ON documents.organisationid = m.organisationid - WHERE name = (%s) AND content = (%s) AND m.userid = (%s) + select_query = sql.SQL(""" SELECT name,content,content_byte + FROM documents + JOIN membership m ON documents.organisationid = m.organisationid + WHERE name = (%s) AND m.userid = (%s) """) + #AND content LIKE %(%s)% - result = execute_query(select_query, (doc_name, doc_content, user_id,)) + result = execute_query(select_query, (doc_name, user_id, )) if len(result) > 0: for document in result: name = document[0] diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 1ccc5f15..f9a1e3d5 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -132,7 +132,8 @@ def get_ordered_nuggets_by_doc_name(self, document_name: str, document_content: self.signals.status.emit("get_ordered_nuggets_by_doc_name") for document in self.document_base.documents: if document.name == document_name: - self.signals.ordert_nuggets.emit(list(sorted(document.nuggets, key=lambda x: x[CachedDistanceSignal]))) + document_obj = Document(document_name, document_content) + self.signals.ordert_nuggets.emit(list(sorted(document_obj.nuggets, key=lambda x: x[CachedDistanceSignal]))) return logger.error(f"Document \"{document_name}\" not found in document base!") self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) From aff6a695b5afd1efa219b82a4816e0f398465e16 Mon Sep 17 00:00:00 2001 From: cophilot Date: Thu, 8 Feb 2024 14:19:02 +0100 Subject: [PATCH 217/254] added confirm nugget endpoint --- wannadb_web/routing/core.py | 89 ++++++++++++++++++++++++++++++++----- wannadb_web/worker/tasks.py | 10 +++-- 2 files changed, 86 insertions(+), 13 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index fe38f3c1..3a6fac54 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -39,7 +39,7 @@ from wannadb_web.util import tokenDecode from wannadb_web.worker.data import Signals -from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, \ +from wannadb_web.worker.tasks import CreateDocumentBase, BaseTask, DocumentBaseAddAttributes, DocumentBaseConfirmNugget, DocumentBaseInteractiveTablePopulation, DocumentBaseLoad, \ DocumentBaseUpdateAttributes, DocumentBaseGetOrderedNuggets @@ -240,8 +240,12 @@ def document_base_attribute_update(): user_id = _token.id - task = DocumentBaseUpdateAttributes().apply_async(args=(user_id, attributes_string, - base_name, organisation_id)) + task = DocumentBaseUpdateAttributes().apply_async(args=( + user_id, + attributes_string, + base_name, + organisation_id + )) return make_response({'task_id': task.id}, 202) @@ -286,8 +290,6 @@ def task_update(task_id: str): signals.feedback_request_from_ui.emit(request.json.get("feedback")) -## todo: renaming of the endpoint - @core_routes.route('/document_base/order/nugget', methods=['POST']) def sort_nuggets(): """ @@ -295,10 +297,6 @@ def sort_nuggets(): This endpoint is used to create a document base from a list of document ids and a list of attributes. - Example Header: - { - } - Example Form Payload: { "authorization": "your_authorization_token" @@ -324,7 +322,78 @@ def sort_nuggets(): user_id = _token.id - task = DocumentBaseGetOrderedNuggets().apply_async(args=(user_id, base_name, organisation_id, document_name, document_content)) + task = DocumentBaseGetOrderedNuggets().apply_async(args=( + user_id, + base_name, + organisation_id, + document_name, + document_content + )) + + return make_response({'task_id': task.id}, 202) + +@core_routes.route('/document_base/confirm/nugget', methods=['POST']) +def confirm_nugget(): + """ + Endpoint to confirm a nugget. + + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "documentName": "your_document_name", + "documentContent": "your_document_content", + "nuggetText": "nugget_as_text", + "startIndex": "start_index_of_nugget", + "endIndex": "end_index_of_nugget", + "interactiveCallTaskId": "interactive_call_task_id" + } + """ + form = request.form + + authorization = form.get("authorization") + organisation_id: Optional[int] = form.get("organisationId") + base_name = form.get("baseName") + + document_name = form.get("documentName") + document_content = form.get("documentContent") + nugget_text = form.get("nuggetText") + start_index: Optional[int] = form.get("startIndex") + end_index: Optional[int] = form.get("endIndex") + + i_task_id = form.get("interactiveCallTaskId") + + if (organisation_id is None + or base_name is None + or document_name is None + or document_content is None + or authorization is None + or nugget_text is None + or start_index is None + or end_index is None + or i_task_id is None): + + return make_response({"error": "missing parameters"}, 400) + + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + user_id = _token.id + + task = DocumentBaseConfirmNugget().apply_async(args=( + user_id, + base_name, + organisation_id, + document_name, + document_content, + nugget_text, + start_index, + end_index, + i_task_id + )) return make_response({'task_id': task.id}, 202) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 407f18be..82ccd2f6 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -323,14 +323,18 @@ def run(self, user_id: int, base_name: str, organisation_id: int, document_name: class DocumentBaseConfirmNugget(BaseTask): name = "DocumentBaseConfirmNugget" + #def run(self, user_id: int, base_name: str, organisation_id: int, + # document_id_for_nugget_x: int, nugget: Union[str, InformationNugget], + # start_index: int, end_index: int, interactive_call_task_id: str): def run(self, user_id: int, base_name: str, organisation_id: int, - document_id_for_nugget_x: int, nugget: Union[str, InformationNugget], + document_name: str, document_text: str, nugget: Union[str, InformationNugget], start_index: int, end_index: int, interactive_call_task_id: str): """ :param user_id: user id :param base_name: name of base document :param organisation_id: organisation id of the document base - :param document_id_for_nugget_x: the document id for the document that gets a new nugget + :param document_name: name of the document + :param document_text: text of the document :param nugget: the Nugget that gets confirmed :param start_index: start of the nugget in the document :param end_index: end of the nugget in the document @@ -343,7 +347,7 @@ def run(self, user_id: int, base_name: str, organisation_id: int, api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() - document_name, document_text = getDocuments([document_id_for_nugget_x], user_id)[0] + #document_name, document_text = getDocuments([document_id_for_nugget_x], user_id)[0] document = Document(document_name, document_text) From f53ec7c9d2a249a08965c4560d5cd7f43f1218c6 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Thu, 8 Feb 2024 17:07:10 +0100 Subject: [PATCH 218/254] build(prod): adj the target for worker and flower --- docker-compose-prod.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index b9b9f87d..a2f71f53 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -23,7 +23,7 @@ services: dockerfile: Dockerfile target: worker tty: true - command: ["celery", "-A", "app.celery", "worker", "-l", "info"] + command: ["celery", "-A", "celery_app", "worker", "-l", "info"] env_file: - wannadb_web/.env/.dev volumes: @@ -40,7 +40,7 @@ services: dockerfile: Dockerfile target: worker tty: true - command: ["celery", "-A", "app.celery", "flower"] + command: ['celery', '-A', 'celery_app', 'flower'] env_file: - wannadb_web/.env/.dev volumes: From 3bc01d2e4e2925045405b4c552c01702dd4a1a84 Mon Sep 17 00:00:00 2001 From: cophilot Date: Sun, 11 Feb 2024 13:14:26 +0100 Subject: [PATCH 219/254] update --- docker-compose-prod.yaml | 147 +++++++++++++++++----------------- wannadb_web/worker/Web_API.py | 2 + 2 files changed, 75 insertions(+), 74 deletions(-) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index b9b9f87d..3e875e52 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -1,83 +1,82 @@ -version: "3.6" +version: '3.6' services: - wannadb: - build: - context: . - dockerfile: Dockerfile - target: prod - restart: always - tty: true - ports: - - "8000:8000" - env_file: - - wannadb_web/.env/.dev - depends_on: - - postgres - - redis - networks: - - mynetwork + wannadb: + build: + context: . + dockerfile: Dockerfile + target: prod + restart: always + tty: true + ports: + - '8000:8000' + env_file: + - wannadb_web/.env/.dev + depends_on: + - postgres + - redis + networks: + - mynetwork - worker: - build: - context: . - dockerfile: Dockerfile - target: worker - tty: true - command: ["celery", "-A", "app.celery", "worker", "-l", "info"] - env_file: - - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb - networks: - - mynetwork - depends_on: - - wannadb - - redis + worker: + build: + context: . + dockerfile: Dockerfile + target: worker + tty: true + command: ['celery', '-A', 'celery_app', 'worker', '-l', 'info'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + depends_on: + - wannadb + - redis - flower: - build: - context: . - dockerfile: Dockerfile - target: worker - tty: true - command: ["celery", "-A", "app.celery", "flower"] - env_file: - - wannadb_web/.env/.dev - volumes: - - ./:/home/wannadb - networks: - - mynetwork - ports: - - "5555:5555" - depends_on: - - wannadb - - redis + flower: + build: + context: . + dockerfile: Dockerfile + target: worker + tty: true + command: ['celery', '-A', 'celery_app', 'flower'] + env_file: + - wannadb_web/.env/.dev + volumes: + - ./:/home/wannadb + networks: + - mynetwork + ports: + - '5555:5555' + depends_on: + - wannadb + - redis + postgres: + image: postgres + container_name: postgres-container + environment: + POSTGRES_PASSWORD: 0 + POSTGRES_DB: userManagement + networks: + - mynetwork + ports: + - '5432:5432' + volumes: + - pgdata:/var/lib/postgresql/data - postgres: - image: postgres - container_name: postgres-container - environment: - POSTGRES_PASSWORD: 0 - POSTGRES_DB: userManagement - networks: - - mynetwork - ports: - - "5432:5432" - volumes: - - pgdata:/var/lib/postgresql/data - - redis: - image: redis:alpine - container_name: redis-container - ports: - - "6379:6379" - networks: - - mynetwork + redis: + image: redis:alpine + container_name: redis-container + ports: + - '6379:6379' + networks: + - mynetwork networks: - mynetwork: - driver: bridge + mynetwork: + driver: bridge volumes: - pgdata: + pgdata: diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index f9a1e3d5..7ce0ad68 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -57,6 +57,8 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): while (time.time() - start_time) < 300: msg = self.signals.match_feedback.msg if msg is not None: + print("MSG " + msg) + logger.info("MSG " + msg) return msg time.sleep(2) raise TimeoutError("no match_feedback in time provided") From da78d80aa154473c65bc2db3b57f47364d0b93d3 Mon Sep 17 00:00:00 2001 From: cophilot Date: Sun, 11 Feb 2024 13:17:34 +0100 Subject: [PATCH 220/254] fixed format bug --- docker-compose-prod.yaml | 81 +++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml index 2e939d58..d8558da1 100644 --- a/docker-compose-prod.yaml +++ b/docker-compose-prod.yaml @@ -1,21 +1,22 @@ -version: '3.6' +version: "3.6" services: - wannadb: - build: - context: . - dockerfile: Dockerfile - target: prod - restart: always - tty: true - ports: - - '8000:8000' - env_file: - - wannadb_web/.env/.dev - depends_on: - - postgres - - redis - networks: - - mynetwork + wannadb: + build: + context: . + dockerfile: Dockerfile + target: prod + restart: always + tty: true + ports: + - "8000:8000" + env_file: + - wannadb_web/.env/.dev + depends_on: + - postgres + - redis + networks: + - mynetwork + worker: build: context: . @@ -51,30 +52,32 @@ services: depends_on: - wannadb - redis - postgres: - image: postgres - container_name: postgres-container - environment: - POSTGRES_PASSWORD: 0 - POSTGRES_DB: userManagement - networks: - - mynetwork - ports: - - '5432:5432' - volumes: - - pgdata:/var/lib/postgresql/data - redis: - image: redis:alpine - container_name: redis-container - ports: - - '6379:6379' - networks: - - mynetwork + + postgres: + image: postgres + container_name: postgres-container + environment: + POSTGRES_PASSWORD: 0 + POSTGRES_DB: userManagement + networks: + - mynetwork + ports: + - "5432:5432" + volumes: + - pgdata:/var/lib/postgresql/data + + redis: + image: redis:alpine + container_name: redis-container + ports: + - "6379:6379" + networks: + - mynetwork networks: - mynetwork: - driver: bridge + mynetwork: + driver: bridge volumes: - pgdata: + pgdata: \ No newline at end of file From 9d7a3108b4d4767a844cecd811f6314881f50372 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:17:55 +0100 Subject: [PATCH 221/254] feat(_MatchFeedback): adjust type safety --- wannadb_web/worker/data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 44adb64b..5c016c0d 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -117,10 +117,9 @@ class _MatchFeedback(Emitable): @property def msg(self): msg = self.redis.get(self.type) - if msg is None: - return None - m: dict[str, Any] = json.loads(msg) - return m + if isinstance(msg, str) and msg.startswith("{"): + m: dict[str, Any] = json.loads(msg) + return m def to_json(self): if self.msg is None: From 7da68e3cd626ff5c34e864811f0d5d600485d7c1 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:18:36 +0100 Subject: [PATCH 222/254] feat(DocumentBaseConfirmNugget): rm creation of doc base no interaction --- wannadb_web/worker/tasks.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 82ccd2f6..2b6d13fe 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -344,19 +344,12 @@ def run(self, user_id: int, base_name: str, organisation_id: int, self._redis_client = RedisCache(str(self.request.id)) self.load() - api = WannaDB_WebAPI(user_id, base_name, organisation_id) - api.load_document_base_from_bson() - - #document_name, document_text = getDocuments([document_id_for_nugget_x], user_id)[0] - document = Document(document_name, document_text) self._signals.match_feedback.emit(match_feedback(nugget, document, start_index, end_index)) # no need to update the document base the doc will be saved in the interactive call - if api.signals.error.msg is None: - api.update_document_base_to_bson() - self.update(State.SUCCESS) - return self + self.update(State.SUCCESS) + return self def nugget_exist(nugget: str, document: Document, start_index: int, end_index: int): From f91c041fa380b4e9e23e0eebab92647c23132326 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:19:06 +0100 Subject: [PATCH 223/254] feat(interaction_callback_fn): add status update --- wannadb_web/worker/Web_API.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 7ce0ad68..c2481dc8 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -57,8 +57,7 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): while (time.time() - start_time) < 300: msg = self.signals.match_feedback.msg if msg is not None: - print("MSG " + msg) - logger.info("MSG " + msg) + self.signals.status.emit("Feedback received from UI") return msg time.sleep(2) raise TimeoutError("no match_feedback in time provided") From b7112b6644336f1cd17901518d3da971f6dfc93a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:33:55 +0100 Subject: [PATCH 224/254] feat(DocumentBaseConfirmNugget): fix id missmatch --- wannadb_web/worker/tasks.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 2b6d13fe..ff55a043 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -322,10 +322,7 @@ def run(self, user_id: int, base_name: str, organisation_id: int, document_name: class DocumentBaseConfirmNugget(BaseTask): name = "DocumentBaseConfirmNugget" - - #def run(self, user_id: int, base_name: str, organisation_id: int, - # document_id_for_nugget_x: int, nugget: Union[str, InformationNugget], - # start_index: int, end_index: int, interactive_call_task_id: str): + def run(self, user_id: int, base_name: str, organisation_id: int, document_name: str, document_text: str, nugget: Union[str, InformationNugget], start_index: int, end_index: int, interactive_call_task_id: str): @@ -341,7 +338,7 @@ def run(self, user_id: int, base_name: str, organisation_id: int, :param interactive_call_task_id: the same task id that's used for interactive call """ self._signals = Signals(interactive_call_task_id) - self._redis_client = RedisCache(str(self.request.id)) + self._redis_client = RedisCache(str(user_id)) self.load() document = Document(document_name, document_text) @@ -360,7 +357,7 @@ def nugget_exist(nugget: str, document: Document, start_index: int, end_index: i def match_feedback(nugget: Union[str, InformationNugget], document: Document, - start_index: int = None, end_index: int = None): + start_index: Optional[int] = None, end_index: Optional[int] = None): logger.debug("match_feedback") if isinstance(nugget, str): if document is None: From 2c80a6e5d0f9e81e7ad6664563866ecd7af18f38 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:34:45 +0100 Subject: [PATCH 225/254] reformat --- wannadb_web/worker/Web_API.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index c2481dc8..ff7616cb 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -3,7 +3,7 @@ import json import logging import time -from typing import Optional +from typing import Optional, Any import wannadb from wannadb import resources @@ -23,7 +23,8 @@ from wannadb.statistics import Statistics from wannadb.status import StatusCallback from wannadb_web.SQLite.Cache_DB import SQLiteCacheDBWrapper -from wannadb_web.postgres.queries import getDocument_by_name, getDocumentByNameAndContent, updateDocumentContent, getDocument +from wannadb_web.postgres.queries import getDocument_by_name, getDocumentByNameAndContent, updateDocumentContent, \ + getDocument from wannadb_web.postgres.transactions import addDocument from wannadb_web.worker.data import Signals @@ -36,7 +37,7 @@ def __init__(self, user_id: int, document_base_name: str, organisation_id: int): self._document_id: Optional[int] = None self._document_base: Optional[DocumentBase] = None self.user_id = user_id - self._feedback = None + self._feedback: Optional[dict[str, Any]] = None self.signals = Signals(str(self.user_id)) self.signals.reset() @@ -52,7 +53,7 @@ def status_callback_fn(message, progress): def interaction_callback_fn(pipeline_element_identifier, feedback_request): feedback_request["identifier"] = pipeline_element_identifier self.signals.feedback_request_to_ui.emit(feedback_request) - + start_time = time.time() while (time.time() - start_time) < 300: msg = self.signals.match_feedback.msg @@ -61,7 +62,7 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): return msg time.sleep(2) raise TimeoutError("no match_feedback in time provided") - + self.interaction_callback = InteractionCallback(interaction_callback_fn) if wannadb.resources.MANAGER is None: @@ -71,18 +72,17 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): self.signals.error.emit(Exception("Cache db could not be initialized!")) raise Exception("Cache db could not be initialized!") logger.info("WannaDB_WebAPI initialized") - @property def feedback(self): if self._feedback is None: raise Exception("Feedback is not set!") return self._feedback - + @feedback.setter - def feedback(self, value:dict): + def feedback(self, value: dict): self._feedback = value - + @property def document_id(self): if self._document_id is None: @@ -122,7 +122,7 @@ def get_ordert_nuggets(self, document_id: int): return logger.error(f"Document \"{document_name}\" not found in document base!") self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) - + def get_ordered_nuggets_by_doc_name(self, document_name: str, document_content: str): document = getDocumentByNameAndContent(document_name, document_content, self.user_id) if document is None: @@ -134,12 +134,12 @@ def get_ordered_nuggets_by_doc_name(self, document_name: str, document_content: for document in self.document_base.documents: if document.name == document_name: document_obj = Document(document_name, document_content) - self.signals.ordert_nuggets.emit(list(sorted(document_obj.nuggets, key=lambda x: x[CachedDistanceSignal]))) + self.signals.ordert_nuggets.emit( + list(sorted(document_obj.nuggets, key=lambda x: x[CachedDistanceSignal]))) return logger.error(f"Document \"{document_name}\" not found in document base!") self.signals.error.emit(Exception(f"Document \"{document_name}\" not found in document base!")) - - + def create_document_base(self, documents: list[Document], attributes: list[Attribute], statistics: Statistics): logger.debug("Called slot 'create_document_base'.") self.signals.status.emit("create_document_base") @@ -255,7 +255,7 @@ def update_document_base_to_bson(self): print(self.document_id) print("ATT") print(self.document_base.attributes) - + status = updateDocumentContent(self.document_id, self.document_base.to_bson()) if status is False: logger.error(f"Document base could not be saved to BSON! Document {self.document_id} does not exist!") @@ -348,7 +348,7 @@ def remove_attributes(self, attributes: list[Attribute]): else: logger.error("Attribute name does not exist!") self.signals.error.emit(Exception("Attribute name does not exist!")) - + def update_attributes(self, attributes: list[Attribute]): logger.debug("Called function 'update_attributes'.") self.document_base.attributes.clear() From cde27f79f6fec915e7715b3a802e0abe503f7614 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:38:07 +0100 Subject: [PATCH 226/254] feat(_MatchFeedback): add None --- wannadb_web/worker/data.py | 61 ++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 5c016c0d..440dc0fb 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -1,7 +1,7 @@ import abc import json from abc import abstractmethod -from typing import Any +from typing import Any, Union from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute from wannadb.data.signals import BaseSignal @@ -73,11 +73,11 @@ def __init__(self, user_id: str): self.cache_db_to_ui = _Dump("cache_db_to_ui", user_id) self.ordert_nuggets = _Nuggets("ordert_nuggets", user_id) self.match_feedback = _MatchFeedback("match_feedback", user_id) - + def to_json(self) -> dict[str, str]: return {"user_id": self.__user_id, self.feedback.type: self.feedback.to_json(), - + self.error.type: self.error.to_json(), self.status.type: self.status.to_json(), self.finished.type: self.finished.to_json(), @@ -91,120 +91,123 @@ def reset(self): class Emitable(abc.ABC): - + def __init__(self, emitable_type: str, user_id: str): self.type = emitable_type self.redis = RedisCache(user_id) - + @property def msg(self): msg = self.redis.get(self.type) if msg is None: return None return msg - + @abstractmethod def to_json(self): raise NotImplementedError - + @abstractmethod def emit(self, status: Any): raise NotImplementedError class _MatchFeedback(Emitable): - + @property def msg(self): msg = self.redis.get(self.type) if isinstance(msg, str) and msg.startswith("{"): m: dict[str, Any] = json.loads(msg) return m - + def to_json(self): if self.msg is None: return {} - return json.loads(self.msg) - - def emit(self, status: dict[str, Any]): + return self.msg + + def emit(self, status: Union[dict[str, Any], None]): + if status is None: + self.redis.delete(self.type) + return self.redis.set(self.type, json.dumps(status)) class _State(Emitable): - + def to_json(self): if self.msg is None: return "" return self.msg.decode("utf-8") - + def emit(self, status: str): self.redis.set(self.type, status) class _Signal(Emitable): - + def to_json(self): return str(self.msg) - + def emit(self, status: float): self.redis.set(self.type, str(status)) class _Error(Emitable): - + def to_json(self): if self.msg is None: return "" return self.msg.decode("utf-8") - + def emit(self, exception: BaseException): self.redis.set(self.type, str(exception)) class _Nuggets(Emitable): - + def to_json(self): if self.msg is None: return {} if not isinstance(self.msg, str): raise TypeError("_Nugget msg must be of type str") return self.msg - + def emit(self, status: list[InformationNugget]): self.redis.set(self.type, json.dumps(nuggets_to_json(status))) class _DocumentBase(Emitable): - + def to_json(self): if self.msg is None: return {} return json.loads(self.msg) - + def emit(self, status: DocumentBase): self.redis.set(self.type, json.dumps(document_base_to_json(status))) class _Statistics(Emitable): - + @property def msg(self): return "not implemented" - + def to_json(self): return Statistics(False).to_serializable() - + def emit(self, statistic: Statistics): pass class _Feedback(Emitable): - + def to_json(self): if self.msg is None: return {} return json.loads(self.msg) - + def emit(self, status: dict[str, Any]): print("Status: " + str(status)) for key, value in status.items(): @@ -214,9 +217,9 @@ def emit(self, status: dict[str, Any]): class _Dump(Emitable): - + def to_json(self): return self.msg - + def emit(self, status): self.redis.set(self.type, json.dumps(status)) From 4eef8dd315f42c9b2ba50c238688127e7cb34f80 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 13:38:45 +0100 Subject: [PATCH 227/254] feat(interaction_callback_fn): rm received feedback --- wannadb_web/worker/Web_API.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index ff7616cb..92bdfe80 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -59,6 +59,7 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): msg = self.signals.match_feedback.msg if msg is not None: self.signals.status.emit("Feedback received from UI") + self.signals.match_feedback.emit(None) return msg time.sleep(2) raise TimeoutError("no match_feedback in time provided") From fe1d0d60072cd5dd57300d46c2f9edada5978544 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 14:00:00 +0100 Subject: [PATCH 228/254] feat(nugget_exist): fix bug --- wannadb_web/worker/tasks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index ff55a043..97c7be81 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -350,10 +350,14 @@ def run(self, user_id: int, base_name: str, organisation_id: int, def nugget_exist(nugget: str, document: Document, start_index: int, end_index: int): - if document.text.rfind(nugget, start_index, end_index) >= 0: - return True - else: + try: + if document.text[start_index:end_index] == nugget: + return True + except IndexError: + logger.error("Nugget does not exist in the given Text") raise Exception("Nugget does not exist in the given Text") + logger.error("Nugget does not exist in the given Text") + raise Exception("Nugget does not exist in the given Text") def match_feedback(nugget: Union[str, InformationNugget], document: Document, From daf5cb7b9f1aef296ebc4bb9fe4ea9b3ff70557d Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 14:42:18 +0100 Subject: [PATCH 229/254] feat(_MatchFeedback): add new types for canceling the matching --- wannadb_web/worker/data.py | 85 ++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 18 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 440dc0fb..5ad58731 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -1,6 +1,7 @@ import abc import json from abc import abstractmethod +from dataclasses import dataclass from typing import Any, Union from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute @@ -20,7 +21,7 @@ def nugget_to_json(nugget: InformationNugget): return { "text": nugget.text, "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in - nugget.signals.items()], + nugget.signals.items()], "document": {"name": nugget.document.name, "text": nugget.document.text}, "end_char": str(nugget.end_char), "start_char": str(nugget.start_char)} @@ -38,7 +39,7 @@ def document_to_json(document: Document): "text": document.text, "attribute_mappings": "not implemented yet", "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in - document.signals.items()], + document.signals.items()], "nuggets": [nugget_to_json(nugget) for nugget in document.nuggets] } @@ -52,8 +53,8 @@ def attribute_to_json(attribute: Attribute): def document_base_to_json(document_base: DocumentBase): return { 'msg': {"attributes ": [attribute.name for attribute in document_base.attributes], - "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] - } + "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] + } } @@ -77,14 +78,13 @@ def __init__(self, user_id: str): def to_json(self) -> dict[str, str]: return {"user_id": self.__user_id, self.feedback.type: self.feedback.to_json(), - - self.error.type: self.error.to_json(), - self.status.type: self.status.to_json(), - self.finished.type: self.finished.to_json(), - self.document_base_to_ui.type: self.document_base_to_ui.to_json(), - self.statistics.type: self.statistics.to_json(), - self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), - self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + self.error.type: self.error.to_json(), + self.status.type: self.status.to_json(), + self.finished.type: self.finished.to_json(), + self.document_base_to_ui.type: self.document_base_to_ui.to_json(), + self.statistics.type: self.statistics.to_json(), + self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} def reset(self): RedisCache(self.__user_id).delete_user_space() @@ -112,25 +112,74 @@ def emit(self, status: Any): raise NotImplementedError +@dataclass +class CustomMatchFeedback: + message = "custom-match" + document: Document + start: int + end: int + + def to_json(self): + return {"message": self.message, "document": document_to_json(self.document), "start": self.start, + "end": self.end} + + +@dataclass +class NuggetMatchFeedback: + message = "is-match" + nugget: InformationNugget + not_a_match: None + + def to_json(self): + return {"message": self.message, "nugget": nugget_to_json(self.nugget), "not_a_match": self.not_a_match} + + +@dataclass +class NoMatchFeedback: + message = "no-match-in-document" + nugget: InformationNugget + not_a_match: InformationNugget + + def to_json(self): + return {"message": self.message, "nugget": nugget_to_json(self.nugget), + "not_a_match": nugget_to_json(self.not_a_match)} + + class _MatchFeedback(Emitable): @property - def msg(self): + def msg(self) -> Union[CustomMatchFeedback, NuggetMatchFeedback, NoMatchFeedback, None]: msg = self.redis.get(self.type) if isinstance(msg, str) and msg.startswith("{"): - m: dict[str, Any] = json.loads(msg) - return m + m = json.loads(msg) + if "message" in m and m["message"] == "custom-match": + return CustomMatchFeedback(m["document"], m["start"], m["end"]) + elif "message" in m and m["message"] == "is-match": + return NuggetMatchFeedback(m["nugget"], None) + elif "message" in m and m["message"] == "no-match-in-document": + return NoMatchFeedback(m["nugget"], m["not_a_match"]) + return None def to_json(self): if self.msg is None: return {} - return self.msg + return self.msg.to_json() - def emit(self, status: Union[dict[str, Any], None]): + def emit(self, status: Union[CustomMatchFeedback, NuggetMatchFeedback, None]): if status is None: self.redis.delete(self.type) return - self.redis.set(self.type, json.dumps(status)) + if isinstance(status, CustomMatchFeedback): + self.redis.set(self.type, json.dumps( + {"message": status.message, "document": document_to_json(status.document), "start": status.start, + "end": status.end})) + elif isinstance(status, NuggetMatchFeedback): + self.redis.set(self.type, json.dumps({"message": status.message, "nugget": nugget_to_json(status.nugget)})) + elif isinstance(status, NoMatchFeedback): + self.redis.set(self.type, json.dumps( + {"message": status.message, "nugget": nugget_to_json(status.nugget), + "not_a_match": nugget_to_json(status.not_a_match)})) + raise TypeError("status must be of type CustomMatchFeedback or NuggetMatchFeedback or NoMatchFeedback or None") class _State(Emitable): From 07c854536ca829e8a8c4458d0fce956080885fa9 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 14:50:43 +0100 Subject: [PATCH 230/254] feat(DocumentBaseConfirmNugget): adjust types --- wannadb_web/worker/tasks.py | 134 ++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 68 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 97c7be81..02be39a7 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -6,14 +6,13 @@ from celery import Task import wannadb.resources -from wannadb.data.data import Document, Attribute, DocumentBase, InformationNugget -from wannadb.interaction import EmptyInteractionCallback +from wannadb.data.data import Document, Attribute, InformationNugget from wannadb.resources import ResourceManager from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.postgres.queries import getDocuments from wannadb_web.worker.Web_API import WannaDB_WebAPI -from wannadb_web.worker.data import Signals +from wannadb_web.worker.data import Signals, NoMatchFeedback, NuggetMatchFeedback, CustomMatchFeedback from wannadb_web.worker.util import State logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") @@ -22,7 +21,7 @@ class InitManager(Task): name = "InitManager" - + def run(self, *args, **kwargs): ResourceManager() if wannadb.resources.MANAGER is None: @@ -35,13 +34,13 @@ class BaseTask(Task): name = "BaseTask" _signals: Optional[Signals] = None _redis_client: Optional[RedisCache] = None - + def __init__(self): super().__init__() - + def run(self, *args, **kwargs): raise NotImplementedError("BaseTask is abstract") - + @staticmethod def load(): if wannadb.resources.MANAGER is None: @@ -49,34 +48,34 @@ def load(): BaseTask.load() return logging.info("loaded") - + def update(self, - state: State, - meta: Optional[dict[str, Any]] = None, - ) -> None: + state: State, + meta: Optional[dict[str, Any]] = None, + ) -> None: super().update_state(state=state.value, meta=meta) - + def update_state(self, - task_id: Optional[str] = None, - state: Optional[str] = None, - meta: Any = None, - **kwargs: Any - ) -> None: + task_id: Optional[str] = None, + state: Optional[str] = None, + meta: Any = None, + **kwargs: Any + ) -> None: raise NotImplementedError("user update() instead") - + def get_new_input(self): if self._redis_client is None: raise RuntimeError("self._redis_client is None!") _input = self._redis_client.get("input") if _input is not None: pass - + return _input class TestTask(BaseTask): name = "TestTask" - + def run(self, *args, **kwargs): super().run() self.update(state=State.PENDING) @@ -92,9 +91,9 @@ def run(self, *args, **kwargs): class CreateDocumentBase(BaseTask): name = "CreateDocumentBase" - + def run(self, user_id: int, document_ids: list[int], attributes_strings: list[str], statistics_dump: bytes, - base_name: str, organisation_id: int): + base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] statistics: Statistics = pickle.loads(statistics_dump) @@ -106,24 +105,24 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + """ init api """ ## todo hier muss self.request.id durchgeleitet werden und in signals(request_id) gespeichert werden api = WannaDB_WebAPI(user_id, base_name, organisation_id) - + """ Creating document base """ if not isinstance(attributes[0], Attribute): self.update(State.ERROR) raise Exception("Invalid attributes") - + if not isinstance(statistics, Statistics): self.update(State.ERROR) raise Exception("Invalid statistics") - + docs = getDocuments(document_ids, user_id) self.update(State.PENDING) documents: list[Document] = [] @@ -133,9 +132,9 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st else: self.update(State.ERROR) raise Exception("No documents found") - + api.create_document_base(documents, attributes, statistics) - + api.save_document_base_to_bson() if api.signals.error.msg is None: api.update_document_base_to_bson() @@ -152,8 +151,8 @@ def run(self, user_id: int, base_name: str, organisation_id: int): self.load() api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() - #self.update(State.SUCCESS) - #return self + # self.update(State.SUCCESS) + # return self if api.signals.error.msg is None: self.update(State.SUCCESS) return self @@ -163,11 +162,11 @@ def run(self, user_id: int, base_name: str, organisation_id: int): class DocumentBaseAddAttributes(BaseTask): name = "DocumentBaseAddAttributes" - + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] - + for attribute_string in attributes_strings: if attribute_string == "": logger.error("Attribute names cannot be empty!") @@ -176,7 +175,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.add_attributes(attributes) @@ -187,6 +186,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ self.update(State.ERROR) return self + class DocumentBaseUpdateAttributes(BaseTask): name = "DocumentBaseAddAttributes" @@ -213,13 +213,14 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ self.update(State.ERROR) return self + class DocumentBaseRemoveAttributes(BaseTask): name = "DocumentBaseRemoveAttributes" - + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] - + for attribute_string in attributes_strings: if attribute_string == "": logger.error("Attribute names cannot be empty!") @@ -228,7 +229,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.remove_attributes(attributes) @@ -242,11 +243,11 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ class DocumentBaseForgetMatches(BaseTask): name = "DocumentBaseForgetMatches" - + def run(self, user_id: int, attributes_strings: list[str], base_name: str, organisation_id: int): self.load() attributes: list[Attribute] = [] - + for attribute_string in attributes_strings: if attribute_string == "": logger.error("Attribute names cannot be empty!") @@ -255,7 +256,7 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ logger.error("Attribute names must be unique!") raise Exception("Attribute names must be unique!") attributes.append(Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.forget_matches() @@ -269,12 +270,12 @@ def run(self, user_id: int, attributes_strings: list[str], base_name: str, organ class DocumentBaseForgetMatchesForAttribute(BaseTask): name = "DocumentBaseForgetMatches" - + def run(self, user_id: int, attribute_string: str, base_name: str, organisation_id: int): self.load() - + attribute = (Attribute(attribute_string)) - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.forget_matches_for_attribute(attribute) @@ -288,12 +289,12 @@ def run(self, user_id: int, attribute_string: str, base_name: str, organisation_ class DocumentBaseInteractiveTablePopulation(BaseTask): name = "DocumentBaseInteractiveTablePopulation" - + def run(self, user_id: int, base_name: str, organisation_id: int): self._signals = Signals(str(self.request.id)) self._redis_client = RedisCache(str(self.request.id)) self.load() - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() api.interactive_table_population() @@ -302,18 +303,18 @@ def run(self, user_id: int, base_name: str, organisation_id: int): self.update(State.SUCCESS) return self + class DocumentBaseGetOrderedNuggets(BaseTask): name = "DocumentBaseGetOrderedNuggets" - - #def run(self, user_id: int, base_name: str, organisation_id: int, document_id: int): + def run(self, user_id: int, base_name: str, organisation_id: int, document_name: str, document_content: str): self._signals = Signals(str(self.request.id)) self._redis_client = RedisCache(str(self.request.id)) self.load() - + api = WannaDB_WebAPI(user_id, base_name, organisation_id) api.load_document_base_from_bson() - #api.get_ordert_nuggets(document_id) + # api.get_ordert_nuggets(document_id) api.get_ordered_nuggets_by_doc_name(document_name, document_content) # no need to update the document base self.update(State.SUCCESS) @@ -324,8 +325,8 @@ class DocumentBaseConfirmNugget(BaseTask): name = "DocumentBaseConfirmNugget" def run(self, user_id: int, base_name: str, organisation_id: int, - document_name: str, document_text: str, nugget: Union[str, InformationNugget], - start_index: int, end_index: int, interactive_call_task_id: str): + document_name: str, document_text: str, nugget: Union[str, InformationNugget], + start_index: Union[int, None], end_index: Union[int, None], interactive_call_task_id: str): """ :param user_id: user id :param base_name: name of base document @@ -333,17 +334,19 @@ def run(self, user_id: int, base_name: str, organisation_id: int, :param document_name: name of the document :param document_text: text of the document :param nugget: the Nugget that gets confirmed - :param start_index: start of the nugget in the document - :param end_index: end of the nugget in the document + :param start_index: start of the nugget in the document (optional) if start and end is None the nugget is not in the document + :param end_index: end of the nugget in the document (optional) if start and end is None the nugget is not in the document :param interactive_call_task_id: the same task id that's used for interactive call """ self._signals = Signals(interactive_call_task_id) self._redis_client = RedisCache(str(user_id)) self.load() - + document = Document(document_name, document_text) - - self._signals.match_feedback.emit(match_feedback(nugget, document, start_index, end_index)) + if start_index is None and end_index is None and isinstance(nugget, InformationNugget): + self._signals.match_feedback.emit(no_match(nugget)) + else: + self._signals.match_feedback.emit(match_feedback(nugget, document, start_index, end_index)) # no need to update the document base the doc will be saved in the interactive call self.update(State.SUCCESS) return self @@ -361,7 +364,7 @@ def nugget_exist(nugget: str, document: Document, start_index: int, end_index: i def match_feedback(nugget: Union[str, InformationNugget], document: Document, - start_index: Optional[int] = None, end_index: Optional[int] = None): + start_index: Optional[int] = None, end_index: Optional[int] = None) -> Union[NuggetMatchFeedback, CustomMatchFeedback]: logger.debug("match_feedback") if isinstance(nugget, str): if document is None: @@ -371,16 +374,11 @@ def match_feedback(nugget: Union[str, InformationNugget], document: Document, logger.error("Start-index or end-index are missing to find the custom nugget") raise Exception("Start-index or end-index are missing to find the custom nugget") elif nugget_exist(nugget, document, start_index, end_index): - return { - "message": "custom-match", - "document": document, - "start": start_index, - "end": end_index - } - else: - return { - "message": "is-match", - "nugget": nugget, - "not-a-match": None - } + return CustomMatchFeedback(document, start_index, end_index) + if isinstance(nugget, InformationNugget): + return NuggetMatchFeedback(nugget, None) + raise Exception("Invalid nugget type") + +def no_match(nugget: InformationNugget) -> NoMatchFeedback: + return NoMatchFeedback(nugget, nugget) From 6ffe0a5df242ea1e1dfc6a7045c2d949d46026f6 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 14:51:41 +0100 Subject: [PATCH 231/254] feat(interaction_callback_fn): add no-match-in-document t ostop potentially the task --- wannadb_web/worker/Web_API.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 92bdfe80..d77bce6a 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -26,7 +26,7 @@ from wannadb_web.postgres.queries import getDocument_by_name, getDocumentByNameAndContent, updateDocumentContent, \ getDocument from wannadb_web.postgres.transactions import addDocument -from wannadb_web.worker.data import Signals +from wannadb_web.worker.data import Signals, CustomMatchFeedback, NuggetMatchFeedback, NoMatchFeedback logger = logging.getLogger(__name__) @@ -60,8 +60,15 @@ def interaction_callback_fn(pipeline_element_identifier, feedback_request): if msg is not None: self.signals.status.emit("Feedback received from UI") self.signals.match_feedback.emit(None) - return msg - time.sleep(2) + if isinstance(msg, CustomMatchFeedback): + return {"message": "custom-match", "document": msg.document, "start": msg.start} + elif isinstance(msg, NuggetMatchFeedback): + return {"message": "is-match", "nugget": msg.nugget, "not_a_match": msg.not_a_match} + elif isinstance(msg, NoMatchFeedback): + return {"message": "no-match-in-document", "nugget": msg.nugget, "not_a_match": msg.not_a_match} + else: + raise TypeError("Unknown match_feedback type!") + time.sleep(1) raise TimeoutError("no match_feedback in time provided") self.interaction_callback = InteractionCallback(interaction_callback_fn) From 4275aba7612e473749d8ec507ad0a4a694fe149f Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 11 Feb 2024 14:52:28 +0100 Subject: [PATCH 232/254] type(_MatchFeedback): add NoMatchFeedback in function signatur --- wannadb_web/worker/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 5ad58731..da94eeb6 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -165,7 +165,7 @@ def to_json(self): return {} return self.msg.to_json() - def emit(self, status: Union[CustomMatchFeedback, NuggetMatchFeedback, None]): + def emit(self, status: Union[CustomMatchFeedback, NuggetMatchFeedback, NoMatchFeedback, None]): if status is None: self.redis.delete(self.type) return From 82357e288f6fb69d8a0f0dccf62156da983cdf1a Mon Sep 17 00:00:00 2001 From: cophilot Date: Tue, 13 Feb 2024 11:49:48 +0100 Subject: [PATCH 233/254] bug fixes --- wannadb_web/worker/data.py | 3 ++- wannadb_web/worker/tasks.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index da94eeb6..fe9b2760 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -179,7 +179,8 @@ def emit(self, status: Union[CustomMatchFeedback, NuggetMatchFeedback, NoMatchFe self.redis.set(self.type, json.dumps( {"message": status.message, "nugget": nugget_to_json(status.nugget), "not_a_match": nugget_to_json(status.not_a_match)})) - raise TypeError("status must be of type CustomMatchFeedback or NuggetMatchFeedback or NoMatchFeedback or None") + else: + raise TypeError("status must be of type CustomMatchFeedback or NuggetMatchFeedback or NoMatchFeedback or None") class _State(Emitable): diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 02be39a7..0dc429b2 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -353,7 +353,10 @@ def run(self, user_id: int, base_name: str, organisation_id: int, def nugget_exist(nugget: str, document: Document, start_index: int, end_index: int): + print("start: ", start_index, "end: ", end_index) try: + print("doc "+document.text[start_index:end_index]) + print("nug "+nugget) if document.text[start_index:end_index] == nugget: return True except IndexError: @@ -373,8 +376,8 @@ def match_feedback(nugget: Union[str, InformationNugget], document: Document, if start_index is None or end_index is None: logger.error("Start-index or end-index are missing to find the custom nugget") raise Exception("Start-index or end-index are missing to find the custom nugget") - elif nugget_exist(nugget, document, start_index, end_index): - return CustomMatchFeedback(document, start_index, end_index) + # TODO workarround because nugget_exist does not work + return CustomMatchFeedback(document, start_index, end_index) if isinstance(nugget, InformationNugget): return NuggetMatchFeedback(nugget, None) raise Exception("Invalid nugget type") From c58c697a04431693120787f51fb73ae36fd5bcc5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 18:17:15 +0100 Subject: [PATCH 234/254] type(Signals): add ordert nuggets to json --- wannadb_web/worker/data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index fe9b2760..4ada0559 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -84,7 +84,9 @@ def to_json(self) -> dict[str, str]: self.document_base_to_ui.type: self.document_base_to_ui.to_json(), self.statistics.type: self.statistics.to_json(), self.feedback_request_to_ui.type: self.feedback_request_to_ui.to_json(), - self.cache_db_to_ui.type: self.cache_db_to_ui.to_json()} + self.cache_db_to_ui.type: self.cache_db_to_ui.to_json(), + self.ordert_nuggets.type: self.ordert_nuggets.to_json() + } def reset(self): RedisCache(self.__user_id).delete_user_space() From a2fd8059dab3f16381d10a419ca939219c33c8e8 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 18:23:39 +0100 Subject: [PATCH 235/254] fix(interaction_callback_fn): re-emit feedback_request_to_ui --- wannadb_web/worker/Web_API.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index d77bce6a..748de53a 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -51,12 +51,14 @@ def status_callback_fn(message, progress): self.status_callback = StatusCallback(status_callback_fn) def interaction_callback_fn(pipeline_element_identifier, feedback_request): - feedback_request["identifier"] = pipeline_element_identifier - self.signals.feedback_request_to_ui.emit(feedback_request) start_time = time.time() while (time.time() - start_time) < 300: msg = self.signals.match_feedback.msg + + feedback_request["identifier"] = pipeline_element_identifier + self.signals.feedback_request_to_ui.emit(feedback_request) + if msg is not None: self.signals.status.emit("Feedback received from UI") self.signals.match_feedback.emit(None) From a9b82293d8a46131c78c2cf9be89160025808626 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 18:24:43 +0100 Subject: [PATCH 236/254] fix(interaction_callback_fn): re-emit feedback_request_to_ui --- wannadb_web/worker/Web_API.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wannadb_web/worker/Web_API.py b/wannadb_web/worker/Web_API.py index 748de53a..dbde9840 100644 --- a/wannadb_web/worker/Web_API.py +++ b/wannadb_web/worker/Web_API.py @@ -52,11 +52,13 @@ def status_callback_fn(message, progress): def interaction_callback_fn(pipeline_element_identifier, feedback_request): + feedback_request["identifier"] = pipeline_element_identifier + start_time = time.time() while (time.time() - start_time) < 300: msg = self.signals.match_feedback.msg - feedback_request["identifier"] = pipeline_element_identifier + self.signals.feedback_request_to_ui.emit(feedback_request) if msg is not None: From 3693dd4e37b9bec51459f3d7711da0524664c7dd Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 19:55:44 +0100 Subject: [PATCH 237/254] feat: adjust typs for all data-structures --- wannadb_web/worker/data.py | 169 ++++++++++++++++++++++++++----------- 1 file changed, 119 insertions(+), 50 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 4ada0559..3e0c5e22 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -1,6 +1,7 @@ import abc import json -from abc import abstractmethod +import pickle +from abc import abstractmethod, ABC from dataclasses import dataclass from typing import Any, Union @@ -9,54 +10,109 @@ from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache +@dataclass +class _BaseSignal: + identifier:str + signal="not serializable" -def signal_to_json(signal: BaseSignal): - return { - "name": signal.identifier, - "signal": "not serializable" - } + def to_json(self): + return { + "identifier": self.identifier, + "signal":self.signal + } + +def convert_signal(signal: BaseSignal) -> _BaseSignal: + return _BaseSignal(signal.identifier) + +@dataclass +class _InformationNugget: + text:str + signals:dict[str,BaseSignal] + document:Document + end_char:int + start_char:int + + def to_json(self): + return { + "text": self.text, + "signals": [{"name": name, "signal": convert_signal(signal).to_json()} for name, signal in + self.signals.items()], + "document": {"name": self.document.name, "text": self.document.text}, + "end_char": str(self.end_char), + "start_char": str(self.start_char)} -def nugget_to_json(nugget: InformationNugget): - return { - "text": nugget.text, - "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in - nugget.signals.items()], - "document": {"name": nugget.document.name, "text": nugget.document.text}, - "end_char": str(nugget.end_char), - "start_char": str(nugget.start_char)} +def convert_to_nugget(nugget: InformationNugget): + return _InformationNugget(nugget.text,nugget.signals,nugget.document,nugget.end_char,nugget.start_char) -def nuggets_to_json(nuggets: list[InformationNugget]): - return { - str(i): nugget_to_json(nugget) for i, nugget in enumerate(nuggets) + +@dataclass +class _InformationNuggets: + nuggets: list[InformationNugget] + + def to_json(self): + return { + str(i): convert_to_nugget(nugget).to_json() for i, nugget in enumerate(self.nuggets) } +def convert_to_nuggets(nuggets: list[InformationNugget]): + return _InformationNuggets(nuggets) + + +@dataclass +class _Document: + name:str + text:str + attribute_mappings = "not implemented yet" + signals:dict[str,BaseSignal] + nuggets:list[InformationNugget] -def document_to_json(document: Document): - return { - "name": document.name, - "text": document.text, + def to_json(self): + return { + "name": self.name, + "text": self.text, "attribute_mappings": "not implemented yet", - "signals": [{"name": name, "signal": signal_to_json(signal)} for name, signal in - document.signals.items()], - "nuggets": [nugget_to_json(nugget) for nugget in document.nuggets] + "signals": [{"name": name, "signal": convert_signal(signal)} for name, signal in + self.signals.items()], + "nuggets": [convert_to_nugget(nugget).to_json() for nugget in self.nuggets] } -def attribute_to_json(attribute: Attribute): - return { - "name": attribute.name - } +def convert_to_document(document: Document): + return _Document(document.name,document.text,document.signals,document.nuggets) -def document_base_to_json(document_base: DocumentBase): - return { - 'msg': {"attributes ": [attribute.name for attribute in document_base.attributes], - "nuggets": [nugget_to_json(nugget) for nugget in document_base.nuggets] - } +@dataclass +class _Attribute: + name:str + signals = "not_implemented" - } + def to_json(self): + return { + "name": self.name, + "signals": self.signals + } + +def convert_to_attribute(attribute: Attribute): + return _Attribute(attribute.name) + + +@dataclass +class _DocumentBase: + attributes:list[Attribute] + nuggets:list[InformationNugget] + documents:list[Document] + + def to_json(self): + return { + "attributes": [attribute.name for attribute in self.attributes], + "nuggets": [convert_to_nugget(nugget).to_json() for nugget in self.nuggets], + "documents": [document for document in self.documents] + } + +def convert_to_document_base(document_base: DocumentBase): + return _DocumentBase(document_base.attributes,document_base.nuggets,document_base.documents) class Signals: @@ -67,7 +123,7 @@ def __init__(self, user_id: str): self.status = _State("status", user_id) self.finished = _Signal("finished", user_id) self.error = _Error("error", user_id) - self.document_base_to_ui = _DocumentBase("document_base_to_ui", user_id) + self.document_base_to_ui = _DocumentBaseToUi("document_base_to_ui", user_id) self.statistics = _Statistics("statistics_to_ui", user_id) self.feedback_request_to_ui = _Feedback("feedback_request_to_ui", user_id) self.feedback_request_from_ui = _Feedback("feedback_request_from_ui", user_id) @@ -122,7 +178,7 @@ class CustomMatchFeedback: end: int def to_json(self): - return {"message": self.message, "document": document_to_json(self.document), "start": self.start, + return {"message": self.message, "document": convert_to_document(self.document).to_json(), "start": self.start, "end": self.end} @@ -133,7 +189,7 @@ class NuggetMatchFeedback: not_a_match: None def to_json(self): - return {"message": self.message, "nugget": nugget_to_json(self.nugget), "not_a_match": self.not_a_match} + return {"message": self.message, "nugget": convert_to_nugget(self.nugget).to_json(), "not_a_match": self.not_a_match} @dataclass @@ -143,8 +199,8 @@ class NoMatchFeedback: not_a_match: InformationNugget def to_json(self): - return {"message": self.message, "nugget": nugget_to_json(self.nugget), - "not_a_match": nugget_to_json(self.not_a_match)} + return {"message": self.message, "nugget": convert_to_nugget(self.nugget).to_json(), + "not_a_match": convert_to_nugget(self.not_a_match).to_json()} class _MatchFeedback(Emitable): @@ -173,14 +229,14 @@ def emit(self, status: Union[CustomMatchFeedback, NuggetMatchFeedback, NoMatchFe return if isinstance(status, CustomMatchFeedback): self.redis.set(self.type, json.dumps( - {"message": status.message, "document": document_to_json(status.document), "start": status.start, + {"message": status.message, "document": convert_to_document(status.document).to_json(), "start": status.start, "end": status.end})) elif isinstance(status, NuggetMatchFeedback): - self.redis.set(self.type, json.dumps({"message": status.message, "nugget": nugget_to_json(status.nugget)})) + self.redis.set(self.type, json.dumps({"message": status.message, "nugget": convert_to_nugget(status.nugget).to_json()})) elif isinstance(status, NoMatchFeedback): self.redis.set(self.type, json.dumps( - {"message": status.message, "nugget": nugget_to_json(status.nugget), - "not_a_match": nugget_to_json(status.not_a_match)})) + {"message": status.message, "nugget": convert_to_nugget(status.nugget).to_json(), + "not_a_match": convert_to_nugget(status.not_a_match).to_json()})) else: raise TypeError("status must be of type CustomMatchFeedback or NuggetMatchFeedback or NoMatchFeedback or None") @@ -218,26 +274,39 @@ def emit(self, exception: BaseException): class _Nuggets(Emitable): + @property + def msg(self) -> list[InformationNugget]: + msg = self.redis.get(self.type) + if isinstance(msg,bytes): + return pickle.loads(msg) + raise TypeError("msg is not bytes") + + def to_json(self): if self.msg is None: return {} - if not isinstance(self.msg, str): - raise TypeError("_Nugget msg must be of type str") - return self.msg + return convert_to_nuggets(self.msg).to_json() def emit(self, status: list[InformationNugget]): - self.redis.set(self.type, json.dumps(nuggets_to_json(status))) + self.redis.set(self.type, pickle.dumps(status)) -class _DocumentBase(Emitable): +class _DocumentBaseToUi(Emitable): + + @property + def msg(self) -> DocumentBase: + msg = self.redis.get(self.type) + if isinstance(msg,bytes): + return pickle.loads(msg) + raise TypeError("msg is not bytes") def to_json(self): if self.msg is None: return {} - return json.loads(self.msg) + return convert_to_document_base(self.msg).to_json() def emit(self, status: DocumentBase): - self.redis.set(self.type, json.dumps(document_base_to_json(status))) + self.redis.set(self.type, pickle.dumps(status)) class _Statistics(Emitable): From 334aff24a96b4f5d165274b59ceafef79031bf77 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 20:24:25 +0100 Subject: [PATCH 238/254] feat: adjust typs for all data-structures --- wannadb_web/worker/data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 3e0c5e22..fb3df385 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -298,7 +298,8 @@ def msg(self) -> DocumentBase: msg = self.redis.get(self.type) if isinstance(msg,bytes): return pickle.loads(msg) - raise TypeError("msg is not bytes") + else: + raise TypeError("msg is not bytes") def to_json(self): if self.msg is None: From e2f220ce30555241782e768eed07d04189d516cb Mon Sep 17 00:00:00 2001 From: cophilot Date: Tue, 13 Feb 2024 20:43:03 +0100 Subject: [PATCH 239/254] added endpoints --- wannadb_web/routing/core.py | 77 +++++++++++++++++++++++++++++++++++-- wannadb_web/worker/tasks.py | 1 - 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/wannadb_web/routing/core.py b/wannadb_web/routing/core.py index 3a6fac54..9581800e 100644 --- a/wannadb_web/routing/core.py +++ b/wannadb_web/routing/core.py @@ -33,7 +33,7 @@ from flask import Blueprint, make_response, request from celery.result import AsyncResult -from wannadb.data.data import Attribute +from wannadb.data.data import Attribute, Document, InformationNugget from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache from wannadb_web.util import tokenDecode @@ -332,10 +332,10 @@ def sort_nuggets(): return make_response({'task_id': task.id}, 202) -@core_routes.route('/document_base/confirm/nugget', methods=['POST']) -def confirm_nugget(): +@core_routes.route('/document_base/confirm/nugget/custom', methods=['POST']) +def confirm_nugget_custom(): """ - Endpoint to confirm a nugget. + Endpoint to confirm a custom nugget. Example Form Payload: { @@ -397,3 +397,72 @@ def confirm_nugget(): return make_response({'task_id': task.id}, 202) +@core_routes.route('/document_base/confirm/nugget/match', methods=['POST']) +def confirm_nugget_match(): + """ + Endpoint to confirm a match nugget. + + Example Form Payload: + { + "authorization": "your_authorization_token" + "organisationId": "your_organisation_id", + "baseName": "your_document_base_name", + "documentName": "your_document_name", + "documentContent": "your_document_content", + "nuggetText": "nugget_as_text", + "startIndex": "start_index_of_nugget", + "endIndex": "end_index_of_nugget", + "interactiveCallTaskId": "interactive_call_task_id" + } + """ + form = request.form + + authorization = form.get("authorization") + organisation_id: Optional[int] = form.get("organisationId") + base_name = form.get("baseName") + + document_name = form.get("documentName") + document_content = form.get("documentContent") + nugget_text = form.get("nuggetText") + start_index: Optional[int] = form.get("startIndex") + end_index: Optional[int] = form.get("endIndex") + + i_task_id = form.get("interactiveCallTaskId") + + if (organisation_id is None + or base_name is None + or document_name is None + or document_content is None + or authorization is None + or nugget_text is None + or start_index is None + or end_index is None + or i_task_id is None): + + return make_response({"error": "missing parameters"}, 400) + + _token = tokenDecode(authorization) + + if _token is False: + return make_response({"error": "invalid token"}, 401) + + user_id = _token.id + + document = Document(document_name, document_content) + + nugget = InformationNugget(document, start_index, end_index) + + task = DocumentBaseConfirmNugget().apply_async(args=( + user_id, + base_name, + organisation_id, + document_name, + document_content, + nugget, + start_index, + end_index, + i_task_id + )) + + return make_response({'task_id': task.id}, 202) + diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 0dc429b2..38a37018 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -376,7 +376,6 @@ def match_feedback(nugget: Union[str, InformationNugget], document: Document, if start_index is None or end_index is None: logger.error("Start-index or end-index are missing to find the custom nugget") raise Exception("Start-index or end-index are missing to find the custom nugget") - # TODO workarround because nugget_exist does not work return CustomMatchFeedback(document, start_index, end_index) if isinstance(nugget, InformationNugget): return NuggetMatchFeedback(nugget, None) From 16e46ccbc3f66f1e0365e3695e3098f7de9ad563 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 21:50:20 +0100 Subject: [PATCH 240/254] feat: adjust typs for all data-structures --- wannadb_web/worker/data.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index fb3df385..9a0312aa 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -279,7 +279,8 @@ def msg(self) -> list[InformationNugget]: msg = self.redis.get(self.type) if isinstance(msg,bytes): return pickle.loads(msg) - raise TypeError("msg is not bytes") + else: + raise TypeError("msg is not bytes") def to_json(self): @@ -288,7 +289,11 @@ def to_json(self): return convert_to_nuggets(self.msg).to_json() def emit(self, status: list[InformationNugget]): - self.redis.set(self.type, pickle.dumps(status)) + b:bytes = pickle.dumps(status) + if isinstance(b,bytes): + self.redis.set(self.type, b) + else: + raise TypeError("b is not bytes") class _DocumentBaseToUi(Emitable): From a4b8c4b0670de7dde3cc09ee7462b95fdc4b1841 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 22:10:12 +0100 Subject: [PATCH 241/254] feat: adjust typs for all data-structures --- wannadb_web/worker/data.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 9a0312aa..3c33a632 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -3,7 +3,7 @@ import pickle from abc import abstractmethod, ABC from dataclasses import dataclass -from typing import Any, Union +from typing import Any, Union, Optional from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute from wannadb.data.signals import BaseSignal @@ -275,8 +275,10 @@ def emit(self, exception: BaseException): class _Nuggets(Emitable): @property - def msg(self) -> list[InformationNugget]: + def msg(self) -> Optional[list[InformationNugget]]: msg = self.redis.get(self.type) + if msg is None: + return None if isinstance(msg,bytes): return pickle.loads(msg) else: @@ -299,8 +301,10 @@ def emit(self, status: list[InformationNugget]): class _DocumentBaseToUi(Emitable): @property - def msg(self) -> DocumentBase: + def msg(self) -> Optional[DocumentBase]: msg = self.redis.get(self.type) + if msg is None: + return None if isinstance(msg,bytes): return pickle.loads(msg) else: From 99f30919fd2b76f3ba27b3d17a5c68da5de142d5 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 22:15:45 +0100 Subject: [PATCH 242/254] fix: doc not5 serializable --- wannadb_web/worker/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index 3c33a632..aed330fd 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -108,7 +108,7 @@ def to_json(self): return { "attributes": [attribute.name for attribute in self.attributes], "nuggets": [convert_to_nugget(nugget).to_json() for nugget in self.nuggets], - "documents": [document for document in self.documents] + "documents": [convert_to_document(document).to_json() for document in self.documents] } def convert_to_document_base(document_base: DocumentBase): From d51461d544330e22533202a9dd36da4b53ca188a Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 22:26:08 +0100 Subject: [PATCH 243/254] debug print --- wannadb_web/worker/data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index aed330fd..c194bd6c 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -4,12 +4,15 @@ from abc import abstractmethod, ABC from dataclasses import dataclass from typing import Any, Union, Optional +import logging from wannadb.data.data import DocumentBase, InformationNugget, Document, Attribute from wannadb.data.signals import BaseSignal from wannadb.statistics import Statistics from wannadb_web.Redis.RedisCache import RedisCache +logger: logging.Logger = logging.getLogger(__name__) + @dataclass class _BaseSignal: identifier:str @@ -291,6 +294,7 @@ def to_json(self): return convert_to_nuggets(self.msg).to_json() def emit(self, status: list[InformationNugget]): + logger.info("emitting Nuggets") b:bytes = pickle.dumps(status) if isinstance(b,bytes): self.redis.set(self.type, b) From 2209da193dc02a45babc429fdbbf60f2e0b51ee7 Mon Sep 17 00:00:00 2001 From: cophilot Date: Tue, 13 Feb 2024 22:39:01 +0100 Subject: [PATCH 244/254] bug fix --- wannadb_web/worker/data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wannadb_web/worker/data.py b/wannadb_web/worker/data.py index c194bd6c..9947ffd6 100644 --- a/wannadb_web/worker/data.py +++ b/wannadb_web/worker/data.py @@ -295,9 +295,12 @@ def to_json(self): def emit(self, status: list[InformationNugget]): logger.info("emitting Nuggets") + print("emitting Nuggets") b:bytes = pickle.dumps(status) if isinstance(b,bytes): self.redis.set(self.type, b) + elif len(status)< 2: + raise TypeError("status smaller than 2") else: raise TypeError("b is not bytes") From 4e604cd3f529e540f2d8dc5f59c459e0a4bd4956 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Tue, 13 Feb 2024 22:39:32 +0100 Subject: [PATCH 245/254] rm request.id --- wannadb_web/worker/tasks.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index 38a37018..de8097ec 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -109,7 +109,6 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st """ init api """ - ## todo hier muss self.request.id durchgeleitet werden und in signals(request_id) gespeichert werden api = WannaDB_WebAPI(user_id, base_name, organisation_id) """ @@ -291,8 +290,8 @@ class DocumentBaseInteractiveTablePopulation(BaseTask): name = "DocumentBaseInteractiveTablePopulation" def run(self, user_id: int, base_name: str, organisation_id: int): - self._signals = Signals(str(self.request.id)) - self._redis_client = RedisCache(str(self.request.id)) + self._signals = Signals(str(user_id)) + self._redis_client = RedisCache(str(user_id)) self.load() api = WannaDB_WebAPI(user_id, base_name, organisation_id) @@ -308,8 +307,8 @@ class DocumentBaseGetOrderedNuggets(BaseTask): name = "DocumentBaseGetOrderedNuggets" def run(self, user_id: int, base_name: str, organisation_id: int, document_name: str, document_content: str): - self._signals = Signals(str(self.request.id)) - self._redis_client = RedisCache(str(self.request.id)) + self._signals = Signals(str(user_id)) + self._redis_client = RedisCache(str(user_id)) self.load() api = WannaDB_WebAPI(user_id, base_name, organisation_id) @@ -338,7 +337,7 @@ def run(self, user_id: int, base_name: str, organisation_id: int, :param end_index: end of the nugget in the document (optional) if start and end is None the nugget is not in the document :param interactive_call_task_id: the same task id that's used for interactive call """ - self._signals = Signals(interactive_call_task_id) + self._signals = Signals(str(user_id)) self._redis_client = RedisCache(str(user_id)) self.load() From 4851aa3defc0bb4ccf16bab21743e7d7156fee58 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Wed, 14 Feb 2024 14:58:23 +0100 Subject: [PATCH 246/254] bug(CreateDocumentBase): fix if doc are null --- wannadb_web/worker/tasks.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/wannadb_web/worker/tasks.py b/wannadb_web/worker/tasks.py index de8097ec..1bc01125 100644 --- a/wannadb_web/worker/tasks.py +++ b/wannadb_web/worker/tasks.py @@ -123,11 +123,21 @@ def run(self, user_id: int, document_ids: list[int], attributes_strings: list[st raise Exception("Invalid statistics") docs = getDocuments(document_ids, user_id) + if docs[0] is tuple[None,None]: + raise Exception(f"user with user id:{user_id} has no document with the document_ids: {document_ids}") + self.update(State.PENDING) documents: list[Document] = [] if docs: for doc in docs: - documents.append(Document(doc[0], doc[1])) + name = doc[0] + text = doc[1] + if name is None: + raise Exception("Document Name is none") + if text is None: + raise Exception("Document text is none") + documents.append(Document(name, text)) + else: self.update(State.ERROR) raise Exception("No documents found") From 36958fa2fc88cc1320956f27c7b0f26e75820629 Mon Sep 17 00:00:00 2001 From: cophilot Date: Thu, 15 Feb 2024 16:43:02 +0100 Subject: [PATCH 247/254] added rebuild script --- prod/rebuild.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 prod/rebuild.sh diff --git a/prod/rebuild.sh b/prod/rebuild.sh new file mode 100644 index 00000000..8ba67ab1 --- /dev/null +++ b/prod/rebuild.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +docker stop $(docker ps -a -q) +docker rm $(docker ps -a -q) + +docker compose -f docker-compose-prod.yaml build + +docker compose -f docker-compose-prod.yaml up -d From daaa6082cea64c3fee731807d3dc470c54f84238 Mon Sep 17 00:00:00 2001 From: leonlolly <82408813+leonlolly@users.noreply.github.com> Date: Wed, 6 Mar 2024 12:21:39 +0100 Subject: [PATCH 248/254] Update README.md --- README.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index c3e922e0..dbf95759 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,30 @@ -# Start the docker +# Start the Web-Backend docker build -beim ersten mal +to build/start the production ``` -docker compose build +docker compose -f "docker-compose-prod.yaml" build +docker compose -f "docker-compose-prod.yaml" up ``` -zum weiter arbeiten +for developers use ``` +docker compose build docker compose up ``` -danach sollte das backend gestartet sein +the flask and other services start automaticly. +for more information here.... + +you can use `code` to attach the container and then work in docker + +git only works when you install gh and make gh auth +then you can work as usual -ihr könnt mit `code` den container attachen und dann im docker arbeiten +a docker rebuild is only necessary if dependencies have changed -git functioniert erst wenn ihr gh installiert und gh auth macht -anschließend könnt ihr wie gewohn arbeiten -ein docker rebuild ist nur nötig wenn sich dependencies geändert haben # WannaDB: Ad-hoc SQL Queries over Text Collections From a7e004c78c08073984830a95c86492606862b2f4 Mon Sep 17 00:00:00 2001 From: cophilot Date: Fri, 15 Mar 2024 10:08:02 +0100 Subject: [PATCH 249/254] added workflows --- ...sion.yml => new_stable_version_manual.yml} | 2 +- .github/workflows/new_stable_version_push.yml | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) rename .github/workflows/{new_stable_version.yml => new_stable_version_manual.yml} (93%) create mode 100644 .github/workflows/new_stable_version_push.yml diff --git a/.github/workflows/new_stable_version.yml b/.github/workflows/new_stable_version_manual.yml similarity index 93% rename from .github/workflows/new_stable_version.yml rename to .github/workflows/new_stable_version_manual.yml index 452828e9..edb205d8 100644 --- a/.github/workflows/new_stable_version.yml +++ b/.github/workflows/new_stable_version_manual.yml @@ -1,4 +1,4 @@ -name: Create New Stable Version +name: Create New Stable Version (Manual) on: workflow_dispatch: diff --git a/.github/workflows/new_stable_version_push.yml b/.github/workflows/new_stable_version_push.yml new file mode 100644 index 00000000..dcd93311 --- /dev/null +++ b/.github/workflows/new_stable_version_push.yml @@ -0,0 +1,27 @@ +name: Create New Stable Version (Push) + +on: + push: + branches: + - main + +jobs: + create_pull_request: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Check commit message for [stable] + id: check_commit_message + run: echo ::set-output name=contains_stable::$(if grep -q "\[stable\]" <<< "$(git log --format=%B -n 1)"; then echo true; else echo false; fi) + + - name: Create Pull Request + if: steps.check_commit_message.outputs.contains_stable == 'true' + uses: peter-evans/create-pull-request@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + branch: stable + base: main + title: Automated New Stable Version + body: This pull request was automatically created by the workflow and contains the latest stable version of the repository. From aee009afe0cb70068e1be3c1262d0ef9ed6755e4 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 17 Mar 2024 12:40:59 +0100 Subject: [PATCH 250/254] add documentation --- README.md | 4 +- ROUTES.md | 651 ++++++++++++++++++++++++------- WEBSERVER_STRUCTURE.md | 114 ++++++ wannadb_web/postgres/__init__.py | 0 4 files changed, 629 insertions(+), 140 deletions(-) create mode 100644 WEBSERVER_STRUCTURE.md delete mode 100644 wannadb_web/postgres/__init__.py diff --git a/README.md b/README.md index dbf95759..94b6b138 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,9 @@ docker compose up ``` the flask and other services start automaticly. -for more information here.... +for more information click [here](https://github.com/lw86ruwo/wannadbBackend/blob/main/WEBSERVER_STRUCTURE.md) + +so see all the routes and the structure of the webserver click [here](https://github.com/lw86ruwo/wannadbBackend/blob/main/ROUTES.md) you can use `code` to attach the container and then work in docker diff --git a/ROUTES.md b/ROUTES.md index 59bcba64..c8f0751e 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -4,45 +4,40 @@ The Flask app is running by default on port 8000. Here we assume that the app is --- -- [HelloWorld](#helloworld) -- [Register](#register) -- [Login](#login) -- [Upload Files](#upload-files) -- [Create Tables (Development)](#create-tables) ---- - -## HelloWorld +- [User Routes](#User-Routes) +- [File Routes](#File-Routes) +- [Core Routes](#Core-routes) -**GET** - -``` -http://localhost:8000/ -``` --- - -## Register - -**POST** - -Register a new user. +## User Routes + +- [Register a new user](#register-a-new-user) +- [Login as a user](#login-as-a-user) +- [Delete a user](#delete-a-user) +- [Create an organization](#create-an-organization) +- [Leave an organization](#leave-an-organization) +- [Get organizations for a user](#get-organizations-for-a-user) +- [Get organization name by ID](#get-organization-name-by-id) +- [Get organization names for a user](#get-organization-names-for-a-user) +- [Add a user to an organization](#add-a-user-to-an-organization) +- [Get members of an organization](#get-members-of-an-organization) +- [Get user name suggestions](#get-user-name-suggestions) + +### Register a new user. ``` http://localhost:8000/register ``` -### Body - -```json -{ - "username": "username", - "password": "password" -} -``` - -### Response - +- Body + ```json + { + "username": "username", + "password": "password" + } + ``` - 422 : User register **failed**: ```json { @@ -59,27 +54,17 @@ http://localhost:8000/register --- -## Login - -**POST** - -Login as user +### Login as a user. ``` http://localhost:8000/login ``` - -### Body - -```json -{ - "username": "username", - "password": "password" -} -``` - -### Response - +- Body + ```json + { + "username": "username", + "password": "password" + } - 401: User login **failed**: ```json { @@ -96,204 +81,592 @@ http://localhost:8000/login --- -## Organisation +### Delete a user. -**POST** +``` +http://localhost:8000/deleteUser/ +``` +- Body + ```json + { + "username": "username", + "password": "password" + } + ``` +- 401: No authorization provided. +- 400: Invalid authorization token. +- 401: User not authorized. +- 401: Wrong Password. +- 204: User deleted successfully. +- 409: User deletion failed. -creatOrganisation +--- +### Create an organization. ``` -http://localhost:8000/creatOrganisation +http://localhost:8000/createOrganisation ``` +- Body + ```json + { + "organisationName": "organisation_name" + } + ``` +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: Organization created successfully. +- 409: Organization creation failed. + +--- -### Body +### Leave an organization. -```json -{ - "authorization": "---", - "organisationName": "---" -} ``` +http://localhost:8000/leaveOrganisation +``` +- Body + ```json + { + "organisationId": "organisation_id" + } + ``` +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: User left the organization successfully. +- 500: Error leaving organization. -### Response +--- -- 409: duplication **Conflict**: +### Get organizations for a user. +``` +http://localhost:8000/getOrganisations +``` +- Body ```json { - "error": "name already exists." + "authorization": "---authorization---jwt---" } ``` -- 200: **success**: +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: Retrieved user's organizations successfully. ```json { - "organisation_id": "---" + "organisation_ids": [number] + } +- 404: User is not in any organization. +- 409: Error retrieving organizations. + +--- + +### Get organization name by ID. +``` +http://localhost:8000/getOrganisationName/<_id> +``` +- URL + ```json + { + _id: "organisation_id" + } +- Body + ```json + { + "authorization": "---authorization---jwt---" } ``` +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: Retrieved organization name successfully. + ```json + { + "organisation_name": [string] + } +- 404: Organization not found. +- 409: Error retrieving organization name. -**GET** +--- -getOrganisationMembers +### Get organization names for a user. ``` -http://localhost:8000/getOrganisationMembers/ +http://localhost:8000/getOrganisationNames ``` -### Header +- Header + ```json + { + "authorization": "---authorization---jwt---" + } +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: Retrieved user's organization names successfully. + ```json + { + "organisations": [number] + } +- 404: User is not in any organization. +- 409: Error retrieving organization names. -```json -{ - "authorization": "---" -} -``` +--- -### Response +### Add a user to an organization. -- 404: Organisation **not found**: +``` +http://localhost:8000/addUserToOrganisation +``` +- Header ```json { - "error": "Organisation not found." + "authorization": "---authorization---jwt---" } - ``` -- 401: User **not authorized**: +- Body ```json { - "error": "User not authorized." + "organisationId": "organisation_id", + "newUser": "new_user" } ``` -- 200: **success**: +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: User added to the organization successfully. ```json { - "members": ["username1", "username2", "username3"] + "organisation_id": number } ``` +- 409: Error adding user to organization. -**POST** +--- -leaveOrganisation +### Get members of an organization. +``` +http://localhost:8000/getOrganisationMembers/<_id> +``` +- URL + ```json + { + _id: "organisation_id" + } + ``` +- Header + ```json + { + "authorization": "---authorization---jwt---" + } + ``` +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: Retrieved organization members successfully. + ```json + { + "members": [string] + } + ``` +- 404: Organization not found. +- 409: Error retrieving organization members. -_Leave a organisation and delete the organisation if the user is the last member._ +--- +### Get user name suggestions. ``` -http://localhost:8000/leaveOrganisation +http://localhost:8000/get/user/suggestion/<_prefix> ``` -### Body +- URL + ```json + { + _prefix: "organisation_id" + } + ``` +- 401: No authorization provided. +- 400: Invalid authorization token. +- 200: Retrieved username suggestions successfully. + ```json + { + "usernames": [string] + } + ``` + +--- +## File Routes + +- [Upload File](#upload-file) +- [Get Files](#get-files) +- [Get document base for an organization](#get-document-base-for-an-organization) +- [Update file content](#update-file-content) +- [Delete a file](#delete-a-file) +- [Get a file](#get-a-file) +--- -```json -{ - "authorization": "---", - "organisationId": "---" -} -``` +### Upload File -### Response +``` +http://localhost:8000/data/upload/file +``` -- 500: **error**: +- Form + - `file`: The file to upload. + - `organisationId`: ID of the organization. +- Header ```json { - "status": false, - "msg": "error message" + "authorization": "---authorization---jwt---" } ``` -- 200: **success**: +- 401: No authorization provided. +- 200: File uploaded successfully. ```json { - "status": true + "document_ids": [number] } ``` +- 400: Invalid file type. + ```json + { + "document_ids": [string] + } + ``` +- 207: Multiple files uploaded, some with errors. + ```json + { + "document_ids": [number|string] + } + + ``` -**POST** +--- -addUserToOrganisation +### Get Files ``` -http://localhost:8000/addUserToOrganisation +http://localhost:8000/data/organization/get/files/<_id> ``` -### Body +- URL + ```json + { + _id: "organisation_id" + } +- Header + ```json + { + "authorization": "---authorization---jwt---" + } + ``` +- 401: No authorization provided. +- 200: Retrieved organization files successfully. + ```json + { + documents: "id" + } + ``` -```json -{ - "authorization": "---", - "organisationName": "---", - "newUser": "---" -} -``` +--- + +### Get document base for an organization. -### Response +``` +http://localhost:8000/data/organization/get/documentbase/<_id> +``` -- 409: duplication **Conflict**: (temp) +- URL ```json { - "error": "error message" + _id: "organisation_id" + } +- Header + ```json + { + "authorization": "---authorization---jwt---" } ``` -- 200: **success**: +- 401: No authorization provided. +- 200: Retrieved document base successfully. ```json { - "organisation_id": "---" + document_base: "document_base" } ``` --- -## Upload Files +### Update file content. -**POST** +``` +http://localhost:8000/data/update/file/content +``` -Upload files. +- Header + ```json + { + "authorization": "---authorization---jwt---" + } + ``` +- Body + ```json + { + "documentId": "document_id", + "newContent": "new_content" + } + ``` +- 401: No authorization provided. +- 200: File content updated successfully. + ```json + { + "status": bool + } + ``` + +--- + +### Delete a file. ``` -http://localhost:8000/data/upload +http://localhost:8000/data/file/delete ``` -### Body +- Header + ```json + { + "authorization": "---authorization---jwt---" + } + ``` +- Body + ```json + { + "documentId": "document_id" + } + ``` +- 401: No authorization provided. +- 200: File deleted successfully. + ```json + { + "status": bool + } + ``` + +--- + +### Get a file. -- `file` (form-data): Files to upload -- `authorization` (form-data): User authorization token -- `organisationId` (form-data): Organization ID +``` +http://localhost:8000/data/get/file/<_id> +``` -### Response -- 400: Upload **failed**: +- URL + ```json + { + _id: "document_id" + } +- Header + ```json + { + "authorization": "---authorization---jwt---" + } ``` - Returns a list of document file types. +- 401: No authorization provided. +- 200: Retrieved file successfully. + ```json + { + document_ids: [list, string, bytes] + } ``` -- 207: Upload **partial success**: +- 404: File not found. + ```json + { + document_ids: [] + } ``` - Returns a list of document file types and documentIds. +- 206: Partial content retrieved. + ```json + { + document_ids: [list, string, bytes] + } ``` -- 201: Upload **success**: + +-- + +## Core Routes + +This module defines Flask routes for the 'core' functionality of the Wannadb UI. + +- [Create a document base](#create-a-document-base) +- [Load a document base](#load-a-document-base) +- [Interactive document population](#interactive-document-population) +- [Add attributes to a document base](#add-attributes-to-a-document-base) +- [Update the attributes of a document base](#update-the-attributes-of-a-document-base) +- [Sort nuggets](#sort-nuggets) +- [Confirm a custom nugget](#confirm-a-custom-nugget) +- [Confirm a match nugget](#confirm-a-match-nugget) +- [Get document base for an organization](#get-document-base-for-an-organization) + + +--- + +### Create a document base + +``` +http://localhost:8000/core/create_document_base +``` + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. + - `document_ids`: Comma-separated list of document IDs. + - `attributes`: Comma-separated list of attributes. +- 401: No authorization provided. +- 200: Document base created successfully. + ```json + {"task_id": "task_id"} ``` - Returns a list of documentIds. + +--- + +### Load a document base. + +``` +http://localhost:8000/core/document_base/load +``` + + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. +- 401: No authorization provided. +- 200: Document base loaded successfully. + ```json + {"task_id": "task_id"} ``` -## Get Dokument +--- -**POST** +### Interactive document population. -get file. ``` -http://localhost:8000/dev/getDocument/<_id> +http://localhost:8000/core/document_base/interactive ``` -### Body +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. +- 401: No authorization provided. +- 200: Document base populated interactively. + ```json + {"task_id": "task_id"} + ``` -- None +--- -### Response +### Add attributes to a document base. + +``` +http://localhost:8000/core/document_base/attributes/add +``` -- String of File Content + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. + - `attributes`: Comma-separated list of attributes. +- 401: No authorization provided. +- 200: Attributes added to document base successfully. + ```json + {"task_id": "task_id"} + ``` --- -## create-tables +### Update the attributes of a document base. -**POST** +``` +http://localhost:8000/core/document_base/attributes/update +``` -Create tables (Development). + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. + - `attributes`: Comma-separated list of attributes. +- 401: No authorization provided. +- 200: Attributes updated successfully. + ```json + {"task_id": "task_id"} + ``` + +--- + +### Sort nuggets. + +``` +http://localhost:8000/core/document_base/order/nugget +``` + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. + - `documentName`: Your document name. + - `documentContent`: Your document content. +- 401: No authorization provided. +- 200: Nuggets sorted successfully. + ```json + {"task_id": "task_id"} + ``` + +--- + +### Confirm a custom nugget. ``` -http://localhost:8000/create-tables +http://localhost:8000/core/document_base/confirm/nugget/custom ``` + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. + - `documentName`: Your document name. + - `documentContent`: Your document content. + - `nuggetText`: Nugget as text. + - `startIndex`: Start index of the nugget. + - `endIndex`: End index of the nugget. + - `interactiveCallTaskId`: Interactive call task ID. +- 401: No authorization provided. +- 200: Nugget confirmed successfully. + ```json + {"task_id": "task_id"} + ``` + +--- + +### Confirm a match nugget. + +``` +http://localhost:8000/core/document_base/confirm/nugget/match +``` + +- Form + - `authorization`: Your authorization token. + - `organisationId`: Your organization ID. + - `baseName`: Your document base name. + - `documentName`: Your document name. + - `documentContent`: Your document content. + - `nuggetText`: Nugget as text. + - `startIndex`: Start index of the nugget. + - `endIndex`: End index of the nugget. + - `interactiveCallTaskId`: Interactive call task ID. +- 401: No authorization provided. +- 200: Nugget confirmed successfully. + ```json + {"task_id": "task_id"} + ``` + + + diff --git a/WEBSERVER_STRUCTURE.md b/WEBSERVER_STRUCTURE.md new file mode 100644 index 00000000..c5ac67ef --- /dev/null +++ b/WEBSERVER_STRUCTURE.md @@ -0,0 +1,114 @@ +## File Structure + +``` +. +├── entrypoint.sh +├── util.py +├── __init__.py +├── .env +│ └── .dev +├── Postgres +│ ├── queries.py +│ ├── transactions.py +│ ├── util.py +│ └── __init__.py +├── Redis +│ ├── RedisCache.py +│ ├── util.py +│ └── __init__.py +├── routing +│ ├── core.py +│ ├── dev.py +│ ├── files.py +│ ├── user.py +│ └── __init__.py +├── SQLite +│ ├── Cache_DB.py +│ ├── util.py +│ └── __init__.py +└── worker + ├── data.py + ├── tasks.py + ├── util.py + ├── Web_API.py + └── __init__.py +``` + +### Web Root Directory + +``` +├── entrypoint.sh +├── util.py +├── __init__.py +└── .env + └── .dev +``` +this contains the entrypoint for the webserver, a utility file for general functionality for the whole project, +and a .env file for environment configuration. +a .prod should be added for production environment configuration if tis is needed. +for these changes needs also be done in the Dockerfile + +### Postgres +``` +Postgres +├── queries.py +├── transactions.py +├── util.py +└── __init__.py +``` +Directory related to PostgresSQL database functionality. +This is an abstraction layer for the database connection and queries. + +### Redis +``` +Redis +├── RedisCache.py +├── util.py +└── __init__.py +``` +Directory related to Redis cache functionality. +This is an abstraction layer for the Redis cache. +It also scopes the cache to a specific namespace for the users. + +### Routing +``` +routing +├── core.py +├── dev.py +├── files.py +├── user.py +└── __init__.py +``` +Directory related to routing functionality for the flask webserver. +- core.py: contains the routes for the main application routes. +- dev.py: contains the routes for developers. +- files.py: contains the routes for file uploads and downloads. +- user.py: contains the routes for user authentication and management. + + +### SQLite +``` +SQLite +│ ├── Cache_DB.py +│ ├── util.py +│ └── __init__.py +``` +Directory related to SQLite database functionality. +This is an abstraction layer for the database connection and queries. +It also scopes the DB to a specific namespace for the users. + + +### Worker +``` +worker + ├── data.py + ├── tasks.py + ├── util.py + ├── Web_API.py + └── __init__.py +``` +Directory related to worker functionality, background tasks and asynchronous processing. +- data.py: contains all altert or new types. +- tasks.py: contains all the tasks that are to be run in the background. +- util.py: contains utility functions for the worker. +- Web_API.py: contains the API for the worker to communicate with the core. diff --git a/wannadb_web/postgres/__init__.py b/wannadb_web/postgres/__init__.py deleted file mode 100644 index e69de29b..00000000 From c5a977dae377ea4d029beab657ee46c2c03db289 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 17 Mar 2024 12:47:38 +0100 Subject: [PATCH 251/254] add documentation --- ROUTES.md | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index c8f0751e..3abca20a 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -154,6 +154,7 @@ http://localhost:8000/getOrganisations { "organisation_ids": [number] } + ``` - 404: User is not in any organization. - 409: Error retrieving organizations. @@ -168,6 +169,7 @@ http://localhost:8000/getOrganisationName/<_id> { _id: "organisation_id" } + ``` - Body ```json { @@ -204,6 +206,7 @@ http://localhost:8000/getOrganisationNames { "organisations": [number] } + ``` - 404: User is not in any organization. - 409: Error retrieving organization names. @@ -219,6 +222,7 @@ http://localhost:8000/addUserToOrganisation { "authorization": "---authorization---jwt---" } + ``` - Body ```json { @@ -245,7 +249,7 @@ http://localhost:8000/getOrganisationMembers/<_id> - URL ```json { - _id: "organisation_id" + "_id": "organisation_id" } ``` - Header @@ -275,7 +279,7 @@ http://localhost:8000/get/user/suggestion/<_prefix> - URL ```json { - _prefix: "organisation_id" + "_prefix": "organisation_id" } ``` - 401: No authorization provided. @@ -283,7 +287,7 @@ http://localhost:8000/get/user/suggestion/<_prefix> - 200: Retrieved username suggestions successfully. ```json { - "usernames": [string] + "usernames": ["string"] } ``` @@ -317,19 +321,19 @@ http://localhost:8000/data/upload/file - 200: File uploaded successfully. ```json { - "document_ids": [number] + "document_ids": ["number"] } ``` - 400: Invalid file type. ```json { - "document_ids": [string] + "document_ids": ["string"] } ``` - 207: Multiple files uploaded, some with errors. ```json { - "document_ids": [number|string] + "document_ids": ["number"|"string"] } ``` @@ -347,6 +351,7 @@ http://localhost:8000/data/organization/get/files/<_id> { _id: "organisation_id" } + ``` - Header ```json { @@ -374,6 +379,7 @@ http://localhost:8000/data/organization/get/documentbase/<_id> { _id: "organisation_id" } + ``` - Header ```json { @@ -413,7 +419,7 @@ http://localhost:8000/data/update/file/content - 200: File content updated successfully. ```json { - "status": bool + "status": "bool" } ``` @@ -441,7 +447,7 @@ http://localhost:8000/data/file/delete - 200: File deleted successfully. ```json { - "status": bool + "status": "bool" } ``` @@ -457,8 +463,9 @@ http://localhost:8000/data/get/file/<_id> - URL ```json { - _id: "document_id" + "_id": "document_id" } + ``` - Header ```json { @@ -469,19 +476,19 @@ http://localhost:8000/data/get/file/<_id> - 200: Retrieved file successfully. ```json { - document_ids: [list, string, bytes] + "document_ids": ["list", "string", "bytes"] } ``` - 404: File not found. ```json { - document_ids: [] + "document_ids": [] } ``` - 206: Partial content retrieved. ```json { - document_ids: [list, string, bytes] + "document_ids": ["list", "string", "bytes"] } ``` @@ -532,9 +539,9 @@ http://localhost:8000/core/document_base/load - Form - - `authorization`: Your authorization token. - - `organisationId`: Your organization ID. - - `baseName`: Your document base name. +- `authorization`: Your authorization token. +- `organisationId`: Your organization ID. +- `baseName`: Your document base name. - 401: No authorization provided. - 200: Document base loaded successfully. ```json From e070618a32568ea105cb202bf469cb0084df522c Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 17 Mar 2024 12:50:28 +0100 Subject: [PATCH 252/254] add documentation --- ROUTES.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index 3abca20a..cb804ce7 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -65,6 +65,7 @@ http://localhost:8000/login "username": "username", "password": "password" } + ``` - 401: User login **failed**: ```json { @@ -167,7 +168,7 @@ http://localhost:8000/getOrganisationName/<_id> - URL ```json { - _id: "organisation_id" + "_id": "organisation_id" } ``` - Body @@ -183,6 +184,7 @@ http://localhost:8000/getOrganisationName/<_id> { "organisation_name": [string] } + ``` - 404: Organization not found. - 409: Error retrieving organization name. @@ -199,6 +201,7 @@ http://localhost:8000/getOrganisationNames { "authorization": "---authorization---jwt---" } + ``` - 401: No authorization provided. - 400: Invalid authorization token. - 200: Retrieved user's organization names successfully. @@ -235,7 +238,7 @@ http://localhost:8000/addUserToOrganisation - 200: User added to the organization successfully. ```json { - "organisation_id": number + "organisation_id": "number" } ``` - 409: Error adding user to organization. @@ -335,7 +338,6 @@ http://localhost:8000/data/upload/file { "document_ids": ["number"|"string"] } - ``` --- From 85e3401c0e337d8204ddeecea34aeea398b73ce7 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Sun, 17 Mar 2024 12:52:13 +0100 Subject: [PATCH 253/254] add documentation --- ROUTES.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ROUTES.md b/ROUTES.md index cb804ce7..0af8b907 100644 --- a/ROUTES.md +++ b/ROUTES.md @@ -153,7 +153,7 @@ http://localhost:8000/getOrganisations - 200: Retrieved user's organizations successfully. ```json { - "organisation_ids": [number] + "organisation_ids": ["number"] } ``` - 404: User is not in any organization. @@ -182,7 +182,7 @@ http://localhost:8000/getOrganisationName/<_id> - 200: Retrieved organization name successfully. ```json { - "organisation_name": [string] + "organisation_name": ["string"] } ``` - 404: Organization not found. @@ -207,7 +207,7 @@ http://localhost:8000/getOrganisationNames - 200: Retrieved user's organization names successfully. ```json { - "organisations": [number] + "organisations": ["number"] } ``` - 404: User is not in any organization. @@ -266,7 +266,7 @@ http://localhost:8000/getOrganisationMembers/<_id> - 200: Retrieved organization members successfully. ```json { - "members": [string] + "members": ["string"] } ``` - 404: Organization not found. @@ -336,7 +336,7 @@ http://localhost:8000/data/upload/file - 207: Multiple files uploaded, some with errors. ```json { - "document_ids": ["number"|"string"] + "document_ids": ["number|string"] } ``` @@ -351,7 +351,7 @@ http://localhost:8000/data/organization/get/files/<_id> - URL ```json { - _id: "organisation_id" + "_id": "organisation_id" } ``` - Header @@ -364,7 +364,7 @@ http://localhost:8000/data/organization/get/files/<_id> - 200: Retrieved organization files successfully. ```json { - documents: "id" + "documents": "id" } ``` @@ -379,7 +379,7 @@ http://localhost:8000/data/organization/get/documentbase/<_id> - URL ```json { - _id: "organisation_id" + "_id": "organisation_id" } ``` - Header @@ -392,7 +392,7 @@ http://localhost:8000/data/organization/get/documentbase/<_id> - 200: Retrieved document base successfully. ```json { - document_base: "document_base" + "document_base": "document_base" } ``` From 11a316fb8a33ad4038fcc16a3e5a545333d25f64 Mon Sep 17 00:00:00 2001 From: lw86ruwo Date: Mon, 18 Mar 2024 12:35:30 +0100 Subject: [PATCH 254/254] adj README.md --- README.md | 60 +++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 94b6b138..78401b5d 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,3 @@ -# Start the Web-Backend docker build - -to build/start the production - -``` -docker compose -f "docker-compose-prod.yaml" build -docker compose -f "docker-compose-prod.yaml" up -``` - -for developers use - -``` -docker compose build -docker compose up -``` - -the flask and other services start automaticly. -for more information click [here](https://github.com/lw86ruwo/wannadbBackend/blob/main/WEBSERVER_STRUCTURE.md) - -so see all the routes and the structure of the webserver click [here](https://github.com/lw86ruwo/wannadbBackend/blob/main/ROUTES.md) - -you can use `code` to attach the container and then work in docker - -git only works when you install gh and make gh auth -then you can work as usual - -a docker rebuild is only necessary if dependencies have changed - - - # WannaDB: Ad-hoc SQL Queries over Text Collections ![Document collection and corresponding table.](header_image.svg) @@ -203,3 +173,33 @@ The `Statistics` object allows you to easily record information during runtime. ### Architecture: GUI The GUI implementation can be found in the `wannadb_ui` package. `wannadb_api.py` provides an asynchronous API for the `wannadb` library using PyQt's slots and signals mechanism. `main_window.py`, `document_base.py`, and `interactive_window.py` contain different parts of the user interface, and `common.py` contains base classes for some recurring user interface elements. + +--- + +# Start the Web-Backend docker build + +to build/start the production + +``` +docker compose -f "docker-compose-prod.yaml" build +docker compose -f "docker-compose-prod.yaml" up +``` + +for developers use + +``` +docker compose build +docker compose up +``` + +the flask and other services start automaticly. +for more information click [here](https://github.com/lw86ruwo/wannadbBackend/blob/main/WEBSERVER_STRUCTURE.md) + +so see all the routes and the structure of the webserver click [here](https://github.com/lw86ruwo/wannadbBackend/blob/main/ROUTES.md) + +you can use `code` to attach the container and then work in docker + +git only works when you install gh and make gh auth +then you can work as usual + +a docker rebuild is only necessary if dependencies have changed