Skip to content

Commit

Permalink
Add API tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mawandm committed Apr 4, 2024
1 parent 5485ca2 commit 94f370e
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 41 deletions.
48 changes: 36 additions & 12 deletions .github/workflows/api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,47 @@ name: Build and test

on:
push:
paths:
- "nesis/api/core/**"
- "nesis/api/tests/**"
- "nesis/api/core/requirements*"
pull_request:
paths:
- "nesis/api/core/**"
- "nesis/api/tests/**"
- "nesis/api/core/requirements*"
# paths:
# - "nesis/api/core/**"
# - "nesis/api/tests/**"
# - "nesis/api/core/requirements*"
# pull_request:
# paths:
# - "nesis/api/core/**"
# - "nesis/api/tests/**"
# - "nesis/api/core/requirements*"

jobs:
check-code-quality:

format:
runs-on: ubuntu-latest
name: Check API code format
steps:
- name: Check out source
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: 'poetry'
test:
runs-on: ubuntu-latest
name: Check code Quality
name: Test API
steps:
- name: Check out source
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt -r requirements-test.txt
- name: Run unit tests
run: |
pytest nesis/api/tests/
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def upgrade() -> None:
sa.Column("base_uri", sa.Unicode(length=255), nullable=False),
sa.Column("filename", sa.Unicode(length=255), nullable=False),
sa.Column(
"pgpt_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=False
"rag_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=False
),
sa.Column(
"store_metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True
Expand Down
2 changes: 1 addition & 1 deletion nesis/api/core/document_loaders/dropbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _sync_dropbox_documents(settings, llm_endpoint, http_client):
document_id=file_unique_id,
filename=file_metadata["title"],
base_uri=file_source,
pgpt_metadata=upload_response,
rag_metadata=upload_response,
store_metadata=file_metadata,
)

Expand Down
2 changes: 1 addition & 1 deletion nesis/api/core/document_loaders/google_drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _sync_google_documents(connection, llm_endpoint, http_client):
document_id=file_unique_id,
filename=file_metadata["title"],
base_uri=file_source,
pgpt_metadata=upload_response,
rag_metadata=upload_response,
store_metadata=file_metadata,
)

Expand Down
6 changes: 3 additions & 3 deletions nesis/api/core/document_loaders/loader_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def _upload_document_to_pgpt(upload_document, file_metadata, llm_endpoint, http_
):
_LOG.debug(f"Skipping file {file_metadata['name']} already up to date")
return None
pgpt_metadata: dict = db_document.pgpt_metadata
if pgpt_metadata is None:
rag_metadata: dict = db_document.rag_metadata
if rag_metadata is None:
return None
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{llm_endpoint}/v1/ingest/{document_data['doc_id']}"
Expand Down
12 changes: 6 additions & 6 deletions nesis/api/core/document_loaders/minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ def _sync_document(
f"Skipping document {item.object_name} already up to date"
)
return
pgpt_metadata: dict = document.pgpt_metadata
if pgpt_metadata is None:
rag_metadata: dict = document.rag_metadata
if rag_metadata is None:
return
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
util.un_ingest_file(
http_client=http_client,
Expand Down Expand Up @@ -187,7 +187,7 @@ def _sync_document(
document_id=item.etag,
filename=item.object_name,
base_uri=endpoint,
pgpt_metadata=response_json,
rag_metadata=response_json,
store_metadata={
"bucket_name": item.bucket_name,
"object_name": item.object_name,
Expand Down Expand Up @@ -215,15 +215,15 @@ def _unsync_s3_documents(
documents = get_documents(base_uri=endpoint)
for document in documents:
store_metadata = document.store_metadata
pgpt_metadata = document.pgpt_metadata
rag_metadata = document.rag_metadata
bucket_name = store_metadata["bucket_name"]
object_name = store_metadata["object_name"]
try:
client.stat_object(bucket_name=bucket_name, object_name=object_name)
except minio.error.S3Error as ex:
str_ex = str(ex)
if "NoSuchKey" in str_ex and "does not exist" in str_ex:
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{pgpt_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
Expand Down
12 changes: 6 additions & 6 deletions nesis/api/core/document_loaders/samba.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ def _process_file(
) < last_change_datetime.replace(tzinfo=None).replace(microsecond=0):
_LOG.debug(f"Skipping shared_file {file_name} already up to date")
return
pgpt_metadata: dict = document.pgpt_metadata
if pgpt_metadata is None:
rag_metadata: dict = document.rag_metadata
if rag_metadata is None:
return
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
util.un_ingest_file(
http_client=http_client,
Expand Down Expand Up @@ -236,7 +236,7 @@ def _process_file(
document_id=file_unique_id,
filename=file_name,
base_uri=endpoint,
pgpt_metadata=response_json,
rag_metadata=response_json,
store_metadata=file_metadata,
)

Expand Down Expand Up @@ -264,15 +264,15 @@ def _unsync_samba_documents(connection, pgpt_endpoint, http_client):
documents = get_documents(base_uri=endpoint)
for document in documents:
store_metadata = document.store_metadata
pgpt_metadata = document.pgpt_metadata
rag_metadata = document.rag_metadata

file_path = store_metadata["file_path"]
try:
stat(file_path, username=username, password=password, port=port)
except smbprotocol.exceptions.SMBOSError as error:
if "No such file" not in str(error):
raise
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
util.un_ingest_file(
http_client=http_client,
Expand Down
12 changes: 6 additions & 6 deletions nesis/api/core/document_loaders/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ def _sync_sharepoint_documents(**kwargs):
f"Skipping file {file.name} already up to date"
)
continue
pgpt_metadata: dict = document.pgpt_metadata
if pgpt_metadata is None:
rag_metadata: dict = document.rag_metadata
if rag_metadata is None:
continue
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{pgpt_endpoint}/v1/ingest/{document_data['doc_id']}"
Expand Down Expand Up @@ -133,7 +133,7 @@ def _sync_sharepoint_documents(**kwargs):
document_id=file.unique_id,
filename=file.name,
base_uri=site_url,
pgpt_metadata=response_json,
rag_metadata=response_json,
store_metadata={
"file_url": f"{location_name}/{file.name}",
"file_name": file.name,
Expand Down Expand Up @@ -180,7 +180,7 @@ def _unsync_sharepoint_documents(**kwargs):
documents = get_documents(base_uri=site_url)
for document in documents:
store_metadata = document.store_metadata
pgpt_metadata = document.pgpt_metadata
rag_metadata = document.rag_metadata
file_url = store_metadata["file_url"]
try:
# Check that the file still exists on the sharepoint server
Expand All @@ -190,7 +190,7 @@ def _unsync_sharepoint_documents(**kwargs):
except ClientRequestException as e:
if e.response.status_code == 404:
# File no longer exists on sharepoint server so we need to delete from model
for document_data in pgpt_metadata.get("data") or []:
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{pgpt_endpoint}/v1/ingest/{document_data['doc_id']}"
Expand Down
8 changes: 4 additions & 4 deletions nesis/api/core/models/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ class Document(Base):
# This is likely the endpoint e.g. hostname, URL, SambaShare e.t.c
base_uri = Column(Unicode(255), nullable=False)
filename = Column(Unicode(255), nullable=False)
pgpt_metadata = Column(JSONB, nullable=False)
rag_metadata = Column(JSONB, nullable=False)
store_metadata = Column(JSONB)

__table_args__ = (
Expand All @@ -254,21 +254,21 @@ def __init__(
self,
document_id: str,
filename: str,
pgpt_metadata: dict,
rag_metadata: dict,
store_metadata: dict,
base_uri: str,
) -> None:
self.uuid = document_id
self.base_uri = base_uri
self.filename = filename
self.pgpt_metadata = pgpt_metadata
self.rag_metadata = rag_metadata
self.store_metadata = store_metadata

def to_dict(self, **kwargs) -> dict:
dict_value = {
"id": self.uuid,
"filename": self.filename,
"pgpt_metadata": self.pgpt_metadata,
"rag_metadata": self.rag_metadata,
"store_metadata": self.store_metadata,
}

Expand Down
2 changes: 1 addition & 1 deletion nesis/api/core/services/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def save_document(**kwargs) -> Document:
document = Document(
document_id=kwargs["document_id"],
filename=kwargs["filename"],
pgpt_metadata=kwargs["pgpt_metadata"],
rag_metadata=kwargs["rag_metadata"],
store_metadata=kwargs["store_metadata"],
base_uri=kwargs["base_uri"],
)
Expand Down

0 comments on commit 94f370e

Please sign in to comment.