Skip to content

Commit

Permalink
Merge main and fix conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
mawandm committed Jun 28, 2024
2 parents fc36f2e + 0fd2e48 commit 1d72123
Show file tree
Hide file tree
Showing 71 changed files with 398 additions and 234 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_rag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- uses: psf/black@stable
with:
options: "--check --verbose"
src: "./nesis/api/"
src: "./nesis/rag/"
version: "24.3.0"

test:
Expand Down
2 changes: 1 addition & 1 deletion nesis/api/alembic/versions/0fc14df78ac3_add_app_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def upgrade() -> None:
)

# ### end Alembic commands ###
op.execute("ALTER TYPE role_action_resource_type ADD VALUE 'APP';")
op.execute("ALTER TYPE role_action_resource_type ADD VALUE IF NOT EXISTS 'APP';")


def downgrade() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.execute("ALTER TYPE role_action_resource_type ADD VALUE 'APPS';")
op.execute("ALTER TYPE role_action_resource_type ADD VALUE 'TASKS';")
op.execute("ALTER TYPE role_action_resource_type ADD VALUE IF NOT EXISTS 'APPS';")
op.execute("ALTER TYPE role_action_resource_type ADD VALUE IF NOT EXISTS 'TASKS';")
# ### end Alembic commands ###


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def upgrade() -> None:
# ### end Alembic commands ###

# Upgrade the document_status type
op.execute("ALTER TYPE datasource_status ADD VALUE 'INGESTING';")
op.execute("ALTER TYPE datasource_status ADD VALUE IF NOT EXISTS 'INGESTING';")


def downgrade() -> None:
Expand Down
39 changes: 39 additions & 0 deletions nesis/api/alembic/versions/7cfa662dff86_fix_document_unique_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""fix document unique key
Revision ID: 7cfa662dff86
Revises: 9f486f3bf6ac
Create Date: 2024-06-24 14:36:18.455799
"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "7cfa662dff86"
down_revision: Union[str, None] = "9f486f3bf6ac"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint("uq_document_uuid_base_url", "document", type_="unique")
op.create_unique_constraint(
"uq_document_uuid_base_url_filename",
"document",
["uuid", "base_uri", "filename"],
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint("uq_document_uuid_base_url_filename", "document", type_="unique")
op.create_unique_constraint(
"uq_document_uuid_base_url", "document", ["uuid", "base_uri"]
)
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""add policy column to role
Revision ID: 9f486f3bf6ac
Revises: cbad6afbe13d
Create Date: 2024-06-17 23:51:33.742876
"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "9f486f3bf6ac"
down_revision: Union[str, None] = "cbad6afbe13d"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"role",
sa.Column("policy", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("role", "policy")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
# Upgrade the document_status type
op.execute("ALTER TYPE datasource_type ADD VALUE 'S3';")
op.execute("ALTER TYPE datasource_type ADD VALUE IF NOT EXISTS 'S3';")
# ### end Alembic commands ###


Expand Down
7 changes: 6 additions & 1 deletion nesis/api/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,17 @@
},
"memcache": {
"hosts": [os.environ.get("NESIS_MEMCACHE_HOSTS", "127.0.0.1:11211")],
"session": {"expiry": 0},
"session": {
"expiry": os.environ.get("NESIS_API_MEMCACHE_SESSION_EXPIRY") or 1800
},
"cache": {
"timeout_default": 300,
},
},
"apps": {
"session": {"expiry": os.environ.get("NESIS_API_APPS_SESSION_EXPIRY") or 1800}
},
"http": {
"workers": {"count": os.environ.get("NESIS_API_HTTP_WORKERS_COUNT", 10)},
},
}
31 changes: 20 additions & 11 deletions nesis/api/core/document_loaders/minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def save(self, **kwargs) -> Union[Document, SqlDocumentStore.Document]:
pass

@abc.abstractmethod
def delete(self, document_id: str, **kwargs) -> None:
def delete(self, document: Document, **kwargs) -> None:
pass


Expand Down Expand Up @@ -117,8 +117,8 @@ def save(self, **kwargs) -> Union[Document, SqlDocumentStore.Document]:
last_modified=kwargs["last_modified"],
)

def delete(self, document_id: str, **kwargs) -> None:
self._extraction_store.delete(document_id=document_id)
def delete(self, document: Document, **kwargs) -> None:
self._extraction_store.delete(document_id=document.uuid)


class IngestRunner(RagRunner):
Expand Down Expand Up @@ -204,16 +204,25 @@ def save(self, **kwargs) -> Union[Document, SqlDocumentStore.Document]:
store_metadata=kwargs["store_metadata"],
)

def delete(self, document_id: str, **kwargs) -> None:
def delete(self, document: Document, **kwargs) -> None:
endpoint = (self._config.get("rag") or {}).get("endpoint")
rag_metadata = kwargs["rag_metadata"]
for document_data in rag_metadata.get("data") or []:
try:
url = f"{endpoint}/v1/ingest/documents/{document_data['doc_id']}"
self._http_client.delete(url=url)
except:
_LOG.warning(f"Failed to delete document {document_data['doc_id']}")
delete_document(document_id=document_id)

# File no longer exists on sharepoint server, so we need to delete from model
try:
self._http_client.deletes(
urls=[
f"{endpoint}/v1/ingest/documents/{document_data['doc_id']}"
for document_data in rag_metadata.get("data") or []
]
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
except:
_LOG.warning(
f"Failed to delete document {document.filename}",
exc_info=True,
)


class MinioProcessor(object):
Expand Down
25 changes: 14 additions & 11 deletions nesis/api/core/document_loaders/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,17 +272,20 @@ def _unsync_documents(
str_ex = str(ex).lower()
if not ("object" in str_ex and "not found" in str_ex):
raise
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{rag_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
)
except:
_LOG.warning(
f"Failed to delete document {document_data['doc_id']}"
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
try:
http_client.deletes(
urls=[
f"{rag_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
for document_data in rag_metadata.get("data") or []
]
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
except:
_LOG.warning(
f"Failed to delete document {document.filename}",
exc_info=True,
)

except:
_LOG.warn("Error fetching and updating documents", exc_info=True)
Expand Down
29 changes: 15 additions & 14 deletions nesis/api/core/document_loaders/samba.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def _process_file(

save_document(
document_id=file_unique_id,
filename=file_name,
filename=file_share.path,
base_uri=endpoint,
rag_metadata=response_json,
store_metadata=file_metadata,
Expand Down Expand Up @@ -297,19 +297,20 @@ def _unsync_samba_documents(connection, rag_endpoint, http_client):
except smbprotocol.exceptions.SMBOSError as error:
if "No such file" not in str(error):
raise
for document_data in rag_metadata.get("data") or []:
try:
util.un_ingest_file(
http_client=http_client,
endpoint=rag_endpoint,
doc_id=document_data["doc_id"],
)
except:
_LOG.warn(
f"Failed to delete document {document_data['doc_id']}"
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
try:
http_client.deletes(
[
f"{rag_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
for document_data in rag_metadata.get("data") or []
]
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
except:
_LOG.warning(
f"Failed to delete document {document.filename}",
exc_info=True,
)
_LOG.info(f"Completed unsyncing files from endpoint {rag_endpoint}")
except:
_LOG.warn("Error fetching and updating documents", exc_info=True)
27 changes: 15 additions & 12 deletions nesis/api/core/document_loaders/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def _sync_document(

save_document(
document_id=file.unique_id,
filename=file.name,
filename=file.serverRelativeUrl,
base_uri=site_url,
rag_metadata=response_json,
store_metadata={
Expand Down Expand Up @@ -301,17 +301,20 @@ def _unsync_sharepoint_documents(sp_context, http_client, rag_endpoint, connecti
except ClientRequestException as e:
if e.response.status_code == 404:
# File no longer exists on sharepoint server so we need to delete from model
for document_data in rag_metadata.get("data") or []:
try:
http_client.delete(
url=f"{rag_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
)
except:
_LOG.warn(
f"Failed to delete document {document_data['doc_id']}"
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
try:
http_client.deletes(
urls=[
f"{rag_endpoint}/v1/ingest/documents/{document_data['doc_id']}"
for document_data in rag_metadata.get("data") or []
]
)
_LOG.info(f"Deleting document {document.filename}")
delete_document(document_id=document.id)
except:
_LOG.warning(
f"Failed to delete document {document.filename}",
exc_info=True,
)
except Exception as ex:
_LOG.warning(
f"Failed to retrieve file {file_url} from sharepoint - {ex}"
Expand Down
12 changes: 9 additions & 3 deletions nesis/api/core/models/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ class Document(Base):
store_metadata = Column(JSONB)

__table_args__ = (
UniqueConstraint("uuid", "base_uri", name="uq_document_uuid_base_url"),
UniqueConstraint(
"uuid", "base_uri", "filename", name="uq_document_uuid_base_url_filename"
),
)

def __init__(
Expand Down Expand Up @@ -262,13 +264,15 @@ class Role(Base):
id = Column(BigInteger, primary_key=True, autoincrement=True)
uuid = Column(Unicode(255), unique=True, nullable=False)
name = Column(Unicode(255), nullable=False)
policy = Column(JSONB)
create_date = Column(DateTime, default=dt.datetime.utcnow, nullable=False)

__table_args__ = (UniqueConstraint("name", name="uq_role_name"),)

def __init__(self, name: str) -> None:
def __init__(self, name: str, policy: Dict[str, Any]) -> None:
self.uuid = str(uuid.uuid4())
self.name = name
self.policy = policy
self.policy_items = []

def to_dict(self, **kwargs) -> dict:
Expand All @@ -278,7 +282,9 @@ def to_dict(self, **kwargs) -> dict:
"create_date": self.create_date.strftime(DEFAULT_DATETIME_FORMAT),
}

if (
if self.policy is not None:
dict_value["policy"] = self.policy
elif (
hasattr(self, "policy_items")
and self.policy_items
and len(self.policy_items) > 0
Expand Down
5 changes: 4 additions & 1 deletion nesis/api/core/services/app_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,10 @@ def create(self, **kwargs) -> Dict[str, Any]:
if token is None:
raise UnauthorizedAccess("Token not supplied")

encoded_secret = base64.b64decode(token).decode("utf-8")
try:
encoded_secret = base64.b64decode(token).decode("utf-8")
except UnicodeDecodeError:
raise UnauthorizedAccess("Invalid app token supplied")
encoded_secret_parts = encoded_secret.split(":")
if len(encoded_secret_parts) != 2:
raise UnauthorizedAccess("Invalid app token supplied")
Expand Down
4 changes: 2 additions & 2 deletions nesis/api/core/services/management.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def create(self, **kwargs) -> Role:
if len(role_action_list) == 0:
raise ServiceException("Invalid role. Policy not supplied")

role_record: Role = Role(name=name)
role_record: Role = Role(name=name, policy=role_policy)
session.add(role_record)
session.commit()
session.refresh(role_record)
Expand All @@ -482,7 +482,7 @@ def create(self, **kwargs) -> Role:
session.add(role_action)
session.commit()

role_record.policy = role_action_list
role_record.policy_items = role_action_list

return role_record
except Exception as exc:
Expand Down
Loading

0 comments on commit 1d72123

Please sign in to comment.