Skip to content

Commit

Permalink
feat(api): enforce datasource list permissions (#15)
Browse files Browse the repository at this point in the history
This PR ensures that a a user can only get/list datasources that they have permission to.

Close #14
  • Loading branch information
mawandm authored Apr 9, 2024
1 parent 9d458a0 commit e8c664b
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 6 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,20 @@
---
# 👋 What is Nesis❓

## Overview
Nesis is an open-source enterprise knowledge discovery solution that connects to multitudes of datasources, collecting


information and making it available in a conversation manner. Nesis leverages generative AI to aggregate document chunks
collected from different documents in multiple formats such as pdf, docx, xlsx and turn them into meaning human-readable compositions. Allowing you to;

1. Converse with your document via a simple chat interface.
2. Conveniently view comparisons between documents.
3. Summarise large documents.

# Demo

https://github.com/ametnes/nesis/assets/86433807/64ea0ad8-5615-4111-8f6e-61ce7d3ad2fc

## 📜 Documentation
Read the Nesis documentation [here](./docs/README.md)

Expand Down
7 changes: 5 additions & 2 deletions nesis/api/core/document_loaders/minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,15 @@ def _sync_document(
metadata=_metadata,
file_path=file_path,
)
response_json = json.loads(response)

except ValueError:
_LOG.warning(f"File {file_path} ingestion failed", exc_info=True)
response_json = {}
except UserWarning:
_LOG.debug(f"File {file_path} is already processing")
return

response_json = json.loads(response)

save_document(
document_id=item.etag,
filename=item.object_name,
Expand Down
26 changes: 25 additions & 1 deletion nesis/api/core/services/datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Datasource,
DatasourceStatus,
DatasourceType,
RoleAction,
)

from nesis.api.core.services.util import (
Expand Down Expand Up @@ -38,6 +39,13 @@ def __init__(
self._LOG.info("Initializing service...")

def _authorized(self, session, token, action):
"""
Check if request is authenticated
:param session:
:param token:
:param action:
:return:
"""
services.authorized(
session_service=self._session_service,
session=session,
Expand All @@ -47,6 +55,11 @@ def _authorized(self, session, token, action):
)

def create(self, **kwargs):
"""
Create a datasource. Must have Datasource.CREATE permissions
:param kwargs:
:return:
"""
datasource = kwargs["datasource"]

session = DBSession()
Expand Down Expand Up @@ -106,12 +119,23 @@ def get(self, **kwargs):
session=session, token=kwargs.get("token"), action=Action.READ
)

# Get datasources this user is authorized to access
authorized_datasources: list[RoleAction] = services.authorized_resources(
self._session_service,
session=session,
token=kwargs.get("token"),
action=Action.READ,
resource_type=objects.ResourceType.DATASOURCES,
)

datasources = {ds.resource for ds in authorized_datasources}

session.expire_on_commit = False
query = session.query(Datasource)
if datasource_id:
query = query.filter(Datasource.uuid == datasource_id)

return query.all()
return [ds for ds in query.all() if ds.name in datasources]
except Exception as e:
self._LOG.exception(f"Error when fetching settings")
raise
Expand Down
9 changes: 7 additions & 2 deletions nesis/api/core/util/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def upload(self, url, filepath, field, metadata: dict) -> Union[None, str]:
"""
Upload a file. We ensure that it is threadsafe by locking on the self_link using Memcached's add method.
"""

self_link = metadata.get("self_link")
if self_link is None:
raise ValueError("Invalid metadata. Must have a self_link link")
Expand All @@ -104,8 +105,12 @@ def upload(self, url, filepath, field, metadata: dict) -> Union[None, str]:
response = req.post(
url=url, files=multipart_form_data, params=data, data=data
)
if response.status_code != 200:
response.raise_for_status()
match response.status_code:
case 400:
# ValueError is fitting since 400 means the data we sent is invalid
raise ValueError(response.text)
case 500 | 501 | 503:
response.raise_for_status()
return response.text
finally:
self._cache.delete(self_link)
Expand Down
90 changes: 90 additions & 0 deletions nesis/api/tests/core/controllers/test_management_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,93 @@ def test_create_role_as_user(client):
assert 200 == roles_response.status_code, response.json

print(json.dumps(response.json))


def test_access_permitted_resources_as_user(client):
"""
This tests that a user given read access to a specific datasource, will only be able to
list/read from that since datasource
:param client:
:return:
"""

# Create an admin user
admin_session = tests.get_admin_session(app=client)

# Create a datasource
payload = {
"type": "minio",
"name": "finance6",
"connection": {
"user": "caikuodda",
"password": "some.password",
"host": "localhost",
"port": "5432",
"database": "initdb",
},
}

# Create finance6 datasource
response = client.post(
f"/v1/datasources",
headers=tests.get_header(token=admin_session["token"]),
data=json.dumps(payload),
)
assert 200 == response.status_code, response.json

# Create finance7 datasource
response = client.post(
f"/v1/datasources",
headers=tests.get_header(token=admin_session["token"]),
data=json.dumps({**payload, "name": "finance7"}),
)
assert 200 == response.status_code, response.json

# A role allowing access to the finance6 datasource
role = {
"name": f"document-manager{random.randint(3, 19)}",
"policy": {
"items": [
{"action": "read", "resource": "datasources/finance6"},
]
},
}
response = client.post(
f"/v1/roles",
headers=tests.get_header(token=admin_session["token"]),
data=json.dumps(role),
)
assert 200 == response.status_code, response.json

# Admin creates a regular user assigning them access to the finance6 datasource
user_data = {
"name": "Test User",
"email": "[email protected]",
"password": "password",
"roles": [response.json["id"]],
"enabled": True,
}

response = client.post(
f"/v1/users",
headers=tests.get_header(token=admin_session["token"]),
data=json.dumps(user_data),
)
assert 200 == response.status_code, response.json

user_session = client.post(
f"/v1/sessions",
headers=tests.get_header(),
data=json.dumps(user_data),
).json
assert 200 == response.status_code, response.json

get_datasources = client.get(
f"/v1/datasources",
headers=tests.get_header(token=user_session["token"]),
data=json.dumps(user_data),
).json

assert 1 == len(
get_datasources["items"]
), f"Expected 1 datasource by received {len(get_datasources.json['items'])}"

0 comments on commit e8c664b

Please sign in to comment.