Skip to content

Commit

Permalink
Merge branch 'release/5.12.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
JeltevanBoheemen committed Sep 3, 2024
2 parents 2cbe71e + e61c103 commit 768f91d
Show file tree
Hide file tree
Showing 258 changed files with 3,552 additions and 2,073 deletions.
42 changes: 36 additions & 6 deletions .github/workflows/backend-test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This workflow will run backend tests on the Python version defined in the Dockerfiles
# This workflow will run backend tests on the Python version defined in the backend/Dockerfile

name: Backend unit tests

Expand All @@ -13,15 +13,45 @@ on:
- 'hotfix/**'
- 'release/**'
- 'dependabot/**'
paths-ignore:
- 'frontend/**'
- '**.md'
paths:
- 'backend/**'
- '.github/workflows/backend*'
- 'docker-compose.yaml'

jobs:
backend-test:
name: Test Backend
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Elasticsearch image
uses: docker/build-push-action@v6
with:
context: .
file: DockerfileElastic
push: true
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-elastic:latest
cache-to: type=inline
- name: Build and push Backend
uses: docker/build-push-action@v6
with:
context: backend/.
push: true
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-backend:latest
cache-to: type=inline
- name: Run backend tests
run: sudo mkdir -p /ci-data && sudo docker-compose --env-file .env-ci run backend pytest
run: |
sudo mkdir -p /ci-data
docker compose pull elasticsearch
docker compose pull backend
docker compose --env-file .env-ci run --rm backend pytest
31 changes: 25 additions & 6 deletions .github/workflows/frontend-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,34 @@ on:
- 'hotfix/**'
- 'release/**'
- 'dependabot/**'
paths-ignore:
- 'backend/**'
- '**.md'
paths:
- 'frontend/**'
- '.github/workflows/frontend*'
- 'docker-compose.yaml'

jobs:
frontend-test:
name: Test Frontend
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run frontend tests
run: sudo docker-compose --env-file .env-ci run frontend yarn test
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build frontend image, using cache from Github registry
uses: docker/build-push-action@v6
with:
context: frontend/.
push: true
tags: ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
cache-from: type=registry,ref=ghcr.io/uudigitalhumanitieslab/ianalyzer-frontend:latest
cache-to: type=inline
- name: Run frontend unit tests
run: |
docker compose pull frontend
docker compose --env-file .env-ci run --rm frontend yarn test
25 changes: 0 additions & 25 deletions .github/workflows/release.yml

This file was deleted.

10 changes: 10 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@
}
},
{
"name": "Python: Debug Tests",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"purpose": [
"debug-test"
],
"console": "internalConsole",
"justMyCode": false
}, {
"name": "celery",
"type": "debugpy",
"request": "launch",
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ keywords:
- elasticsearch
- natural language processing
license: MIT
version: 5.9.0
date-released: '2024-07-05'
version: 5.12.0
date-released: '2024-08-30'
1 change: 0 additions & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ RUN apt-get -y update && apt-get -y upgrade
RUN apt-get install -y pkg-config libxml2-dev libxmlsec1-dev libxmlsec1-openssl default-libmysqlclient-dev

RUN pip install --upgrade pip
RUN pip install pip-tools
# make a directory in the container
WORKDIR /backend
# copy requirements from the host system to the directory in the container
Expand Down
13 changes: 11 additions & 2 deletions backend/addcorpus/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,18 @@ class VisualizationType(Enum):
'scan',
'tab-scan'
'p',
'tags',
'context',
'tab',
]
'''
Field names that cannot be used because they are also query parameters in frontend routes.
Field names that cannot be used because they interfere with other functionality.
Using them would make routing ambiguous.
This is usually because they are also query parameters in frontend routes, and using them
would make routing ambiguous.
`query` is also forbidden because it is a reserved column in CSV downloads. Likewise,
`context` is forbidden because it's used in download requests.
`scan` and `tab-scan` are added because they interfere with element IDs in the DOM.
'''
12 changes: 10 additions & 2 deletions backend/addcorpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ def has_named_entities(self):
try:
mapping = client.indices.get_mapping(
index=self.es_index)
fields = mapping[self.es_index].get(
'mappings', {}).get('properties', {}).keys()
# in production, the index name can be different from the object's es_index value
index_name = list(mapping.keys())[0]
fields = mapping[index_name].get('mappings', {}).get('properties', {}).keys()
if any(field.endswith(':ner') for field in fields):
return True
except:
Expand Down Expand Up @@ -473,6 +474,13 @@ class PageType(models.TextChoices):
help_text='markdown contents of the documentation'
)

@property
def page_index(self):
'''Numerical index to determine the order in which pages should be displayed.
Based on the order in which `PageType` choices are declared.'''
indexed_values = enumerate(__class__.PageType.values)
return next((i for (i, value) in indexed_values if value == self.type), None)

def __str__(self):
return f'{self.corpus_configuration.corpus.name} - {self.type}'

Expand Down
45 changes: 27 additions & 18 deletions backend/addcorpus/permissions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from rest_framework import permissions
from rest_framework.exceptions import NotFound
from users.models import CustomUser
from typing import List
from rest_framework.request import Request
from addcorpus.models import Corpus

def corpus_name_from_request(request):
Expand All @@ -25,20 +24,7 @@ def corpus_name_from_request(request):
return corpus


def filter_user_corpora(corpora: List[Corpus], user: CustomUser) -> List[Corpus]:
'''
Filter all available corpora to only
include the ones the user has access to
'''

return [
corpus
for corpus in corpora
if user.has_access(corpus.name)
]


class CorpusAccessPermission(permissions.BasePermission):
class CanSearchCorpus(permissions.BasePermission):
message = 'You do not have permission to access this corpus'

def has_permission(self, request, view):
Expand All @@ -48,9 +34,32 @@ def has_permission(self, request, view):
# check if the corpus exists
try:
corpus = Corpus.objects.get(name=corpus_name)
assert corpus.active
except:
raise NotFound('Corpus does not exist')

# check if the user has access
return user.has_access(corpus)
return user.can_search(corpus)


class IsCurator(permissions.BasePermission):
'''
The user is permitted to use the corpus definition API.
'''

message = 'You do not have permission to manage corpus definitions'

def has_permission(self, request: Request, view):
return request.user.is_staff

class IsCuratorOrReadOnly(permissions.BasePermission):
'''
The user is permitted to edit the corpus, or it is a read-only request.
'''

message = 'You do not have permission to edit this corpus'

def has_permission(self, request: Request, view):
if request.method in permissions.SAFE_METHODS:
return True

return request.user.is_staff
4 changes: 1 addition & 3 deletions backend/addcorpus/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ class NewReader(CSVReader):
for f in corpus.configuration.fields.all()]

def sources(self, *args, **kwargs):
return (
(fn, {}) for fn in glob.glob(f'{self.data_directory}/**/*.csv', recursive=True)
)
return glob.glob(f'{self.data_directory}/**/*.csv', recursive=True)

return NewReader()
21 changes: 19 additions & 2 deletions backend/addcorpus/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ def to_representation(self, value):
key = super().to_representation(value)
return self.choices[key]

def to_internal_value(self, data):
# If the data provides a display name, get the corresponding key.
# The browsable API sends keys instead of labels; use the original data if no
# matching label is found.
value = next(
(key for (key, label) in self.choices.items() if label == data),
data
)
return super().to_internal_value(value)

class CorpusConfigurationSerializer(serializers.ModelSerializer):
fields = FieldSerializer(many=True, read_only=True)
languages = serializers.ListField(child=LanguageField())
Expand Down Expand Up @@ -123,11 +133,18 @@ def to_representation(self, value):

class CorpusDocumentationPageSerializer(serializers.ModelSerializer):
type = PrettyChoiceField(choices = CorpusDocumentationPage.PageType.choices)
content = DocumentationTemplateField()
index = serializers.IntegerField(source='page_index', read_only=True)
content = DocumentationTemplateField(read_only=True)
content_template = serializers.CharField(source='content')
corpus = serializers.SlugRelatedField(
source='corpus_configuration',
queryset=CorpusConfiguration.objects.all(),
slug_field='corpus__name',
)

class Meta:
model = CorpusDocumentationPage
fields = ['corpus_configuration', 'type', 'content']
fields = ['id', 'corpus', 'type', 'content', 'content_template', 'index']


class JSONDefinitionField(serializers.Field):
Expand Down
21 changes: 15 additions & 6 deletions backend/addcorpus/tests/test_corpus_access.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
from users.models import CustomUser, CustomAnonymousUser
from addcorpus.models import Corpus

def test_access_through_group(db, basic_mock_corpus, group_with_access):
user = CustomUser.objects.create(username='nice-user', password='secret')
user.groups.add(group_with_access)
user.save()
assert user.has_access(basic_mock_corpus)
corpus = Corpus.objects.get(name=basic_mock_corpus)
assert user.can_search(corpus)
assert corpus in user.searchable_corpora()

def test_superuser_access(basic_mock_corpus, admin_user):
assert admin_user.has_access(basic_mock_corpus)
corpus = Corpus.objects.get(name=basic_mock_corpus)
assert admin_user.can_search(corpus)
assert corpus in admin_user.searchable_corpora()

def test_no_corpus_access(db, basic_mock_corpus):
user = CustomUser.objects.create(username='bad-user', password='secret')
assert not user.has_access(basic_mock_corpus)

corpus = Corpus.objects.get(name=basic_mock_corpus)
assert not user.can_search(corpus)
assert corpus not in user.searchable_corpora()

def test_public_corpus_access(db, basic_corpus_public):
user = CustomUser.objects.create(username='new-user', password='secret')
assert user.has_access(basic_corpus_public)
corpus = Corpus.objects.get(name=basic_corpus_public)
assert user.can_search(corpus)
assert corpus in user.searchable_corpora()
anon = CustomAnonymousUser()
assert anon.has_access(basic_corpus_public)
assert anon.can_search(corpus)
assert corpus in anon.searchable_corpora()

def test_api_access(db, basic_mock_corpus, group_with_access, auth_client, auth_user):
# default: no access
Expand Down
Loading

0 comments on commit 768f91d

Please sign in to comment.