Skip to content

Commit

Permalink
Merge branch 'develop' into feature/csv-filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
lukavdplas authored Oct 12, 2023
2 parents e603cf9 + e52b09e commit c5ef6b5
Show file tree
Hide file tree
Showing 15 changed files with 155 additions and 48 deletions.
8 changes: 1 addition & 7 deletions backend/addcorpus/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,7 @@ class VisualizationType(Enum):
'visualize',
'visualizedField',
'normalize',
'size',
'positions',
'freqCompensation',
'analysis',
'maxDocuments',
'numberOfNgrams',
'dateField',
'ngramSettings'
]
'''
Field names that cannot be used because they are also query parameters in frontend routes.
Expand Down
8 changes: 7 additions & 1 deletion backend/addcorpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from addcorpus.validators import validate_language_code, validate_image_filename_extension, \
validate_markdown_filename_extension, validate_es_mapping, validate_mimetype, validate_search_filter, \
validate_name_is_not_a_route_parameter, validate_search_filter_with_mapping, validate_searchable_field_has_full_text_search, \
validate_visualizations_with_mapping, validate_implication
validate_visualizations_with_mapping, validate_implication, any_date_fields, visualisations_require_date_field

MAX_LENGTH_NAME = 126
MAX_LENGTH_DESCRIPTION = 254
Expand Down Expand Up @@ -269,3 +269,9 @@ def clean(self):
validate_implication(self.search_field_core, self.searchable, "Core search fields must be searchable")
except ValidationError as e:
warnings.warn(e.message)

validate_implication(
self.visualizations, self.corpus_configuration.fields.all(),
'The ngram visualisation requires a date field on the corpus',
visualisations_require_date_field, any_date_fields,
)
32 changes: 31 additions & 1 deletion backend/addcorpus/tests/test_validators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from addcorpus.es_mappings import int_mapping, text_mapping, keyword_mapping
from addcorpus.models import Field
from addcorpus.es_mappings import int_mapping, text_mapping, keyword_mapping, main_content_mapping, date_mapping
from addcorpus.validators import *

def test_validate_mimetype():
Expand Down Expand Up @@ -71,3 +72,32 @@ def test_filename_validation():
with pytest.raises(ValidationError):
validate_image_filename_extension('image.txt')

def test_validate_ngram_has_date_field():
text_field = Field(
name='content',
es_mapping=main_content_mapping(),
visualizations=['wordcloud', 'ngram']
)

date_field = Field(
name='date',
es_mapping=date_mapping()
)

with_date_field = [text_field, date_field]
without_date_field = [text_field]

validate_implication(
text_field.visualizations, with_date_field,
'',
visualisations_require_date_field,
any_date_fields
)

with pytest.raises(ValidationError):
validate_implication(
text_field.visualizations, without_date_field,
'',
visualisations_require_date_field,
any_date_fields
)
7 changes: 7 additions & 0 deletions backend/addcorpus/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,10 @@ def validate_markdown_filename_extension(filename):
def validate_image_filename_extension(filename):
allowed = ['.jpeg', '.jpg', '.png', '.JPG']
validate_filename_extension(filename, allowed)

def any_date_fields(fields):
is_date = lambda field: primary_mapping_type(field.es_mapping) == 'date'
return any(map(is_date, fields))

def visualisations_require_date_field(visualisations):
return visualisations and 'ngram' in visualisations
2 changes: 1 addition & 1 deletion backend/corpora/dbnl/dbnl.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def _xml_files(self):
transform_soup_func=utils.pad_content,
),
es_mapping=main_content_mapping(token_counts=True),
visualizations=['wordcloud', 'ngram'],
visualizations=['wordcloud'],
)

has_content = FieldDefinition(
Expand Down
4 changes: 2 additions & 2 deletions backend/corpora/parliament/finland-old.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def sources(self, start, end):
yield csv_file, {}

languages = ['sv', 'fi']
description_page = 'finland.md'
image = 'finland.jpg'
description_page = 'finland-old.md'
image = 'finland-old.jpg'

document_context = document_context()

Expand Down
Binary file modified backend/corpora/parliament/images/finland-old.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion documentation/Defining-corpus-fields.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ The following properties determine how a field appears in the interface.

`search_filter` can be set if the interface should include a search filter widget for the field. I-analyzer includes date filters, multiplechoice filters (used for keyword data), range filters, and boolean filters. See [filters.py](../backend/addcorpus/filters.py).

`visualizations` optionally specifies a list of visualisations that apply for the field. Generally speaking, this is based on the type of data. For date fields and categorical/ordinal fields (usually keyword type), you can use `['resultscount', 'termfrequency']`. For text fields, you can use `['wordcloud', 'ngram']`.
`visualizations` optionally specifies a list of visualisations that apply for the field. Generally speaking, this is based on the type of data. For date fields and categorical/ordinal fields (usually keyword type), you can use `['resultscount', 'termfrequency']`. For text fields, you can use `['wordcloud', 'ngram']`. However, the ngram visualisation also requires that your corpus has a date field.

If a field includes the `'resultscount'` and/or `'termfrequency'` visualisations and it is not a date field, you can also specify `visualisation_sort`, which determines how to sort the x-axis of the graph. Default is `'value'`, where categories are sorted based on the y-axis value (i.e., frequency). You may specify that they should be sorted on `'key'`, so that categories are sorted alphabetically (for keywords) or small-to-large (for numbers).

Expand Down
3 changes: 3 additions & 0 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
FROM node:14-alpine

RUN apk update && apk add --no-cache --virtual .gyp python3 make g++
# Install Chrome
RUN apk add chromium
ENV CHROME_BIN='/usr/bin/chromium-browser'

# create directory frontend on container
WORKDIR /frontend
Expand Down
1 change: 1 addition & 0 deletions frontend/karma.conf.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ module.exports = function (config) {
// '--disable-gpu', this might not be needed http://cvuorinen.net/2017/05/running-angular-tests-in-headless-chrome/
// Without a remote debugging port, Google Chrome exits immediately.
'--remote-debugging-port=9222',
'--no-sandbox'
],
}
}
Expand Down
37 changes: 37 additions & 0 deletions frontend/src/app/models/visualization.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { NgramParameters } from './visualization';

describe('NgramParameters', ()=> {
let ngramParameters: NgramParameters;

beforeEach(() => {
ngramParameters = new NgramParameters(
2,
'any',
false,
'none',
50,
10,
'date'
);
});

it('should convert itself to a param string', () => {
const paramString = ngramParameters.toRouteParam();
expect(paramString).toEqual(
's:2,p:any,c:false,a:none,m:50,n:10,f:date'
);
});

it('should set itself from a param string', () => {
ngramParameters.fromRouteParam(
's:3,p:first,c:true,a:none,m:50,n:20,f:date'
);
expect(ngramParameters.size).toEqual(3);
expect(ngramParameters.positions).toEqual('first');
expect(ngramParameters.freqCompensation).toEqual(true);
expect(ngramParameters.analysis).toEqual('none');
expect(ngramParameters.maxDocuments).toEqual(50);
expect(ngramParameters.numberOfNgrams).toEqual(20);
expect(ngramParameters.dateField).toEqual('date');
});
});
44 changes: 42 additions & 2 deletions frontend/src/app/models/visualization.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { AggregateResult, DateResult } from '.';
import { EsQuery, EsQuerySorted } from './elasticsearch';
import { QueryParameters } from './search-requests';

export interface TermFrequencyResult {
Expand Down Expand Up @@ -111,14 +110,55 @@ export type NGramRequestParameters = {
date_field: string;
} & QueryParameters;

export interface NgramParameters {
export class NgramParameters {
size: number;
positions: string;
freqCompensation: boolean;
analysis: string;
maxDocuments: number;
numberOfNgrams: number;
dateField: string;

ngramSettings: string [];

constructor(size: number,
positions: string,
freqCompensation: boolean,
analysis: string,
maxDocuments: number,
numberOfNgrams: number,
dateField: string
) {
this.size = size;
this.positions = positions;
this.freqCompensation = freqCompensation;
this.analysis = analysis;
this.maxDocuments = maxDocuments;
this.numberOfNgrams = numberOfNgrams;
this.dateField = dateField;
}

toRouteParam(): string {
return [`s:${this.size}`,`p:${this.positions}`,`c:${this.freqCompensation}`,
`a:${this.analysis}`,`m:${this.maxDocuments}`,`n:${this.numberOfNgrams}`,
`f:${this.dateField}`].join(',');
}

fromRouteParam(paramString: string) {
this.ngramSettings = paramString.split(',');
this.size = parseInt(this.findSetting('s'), 10);
this.positions = this.findSetting('p');
this.freqCompensation = this.findSetting('c') === 'true';
this.analysis = this.findSetting('a');
this.maxDocuments = parseInt(this.findSetting('m'), 10);
this.numberOfNgrams = parseInt(this.findSetting('n'), 10);
this.dateField = this.findSetting('f');
}

findSetting(abbreviation: string): string | undefined{
const setting = this.ngramSettings.find(s => s[0] === abbreviation);
return setting.split(':')[1];
}
}

export interface FieldCoverage {
Expand Down
8 changes: 0 additions & 8 deletions frontend/src/app/visualization/ngram/ngram.component.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,4 @@ describe('NgramComponent', () => {
expect(component).toBeTruthy();
});

it('should set the currentParameters with the right type', () => {
const params = convertToParamMap({size: '5'});
component.setParameters(params);
expect(component.currentParameters.size).toEqual(5);
const newParams = convertToParamMap({size: '2'});
component.setParameters(newParams);
expect(component.currentParameters.size).toEqual(2);
});
});
45 changes: 21 additions & 24 deletions frontend/src/app/visualization/ngram/ngram.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ export class NgramComponent extends ParamDirective implements OnChanges {

currentResults: NgramResults;



// options
sizeOptions = [{label: 'bigrams', value: 2}, {label: 'trigrams', value: 3}, {label: 'fourgrams', value: 4}];
positionsOptions = ['any', 'first', 'second'].map(n => ({label: `${n}`, value: n}));
Expand All @@ -50,11 +48,12 @@ export class NgramComponent extends ParamDirective implements OnChanges {
currentParameters: NgramParameters;
lastParameters: NgramParameters;
parametersChanged = false;
ngramSettings: string[];

faCheck = faCheck;
faTimes = faTimes;

nullableParameters = ['size', 'position', 'freqCompensation', 'analysis', 'maxDocuments', 'numberOfNgrams', 'dateField'];
nullableParameters = ['ngramSettings'];

constructor(
private apiService: ApiService,
Expand All @@ -64,6 +63,15 @@ export class NgramComponent extends ParamDirective implements OnChanges {
paramService: ParamService
) {
super(route, router, paramService);
this.currentParameters = new NgramParameters(
this.sizeOptions[0].value,
this.positionsOptions[0].value,
this.freqCompensationOptions[0].value,
'none',
this.maxDocumentsOptions[0].value,
this.numberOfNgramsOptions[0].value,
'date'
);
}

initialize() {}
Expand All @@ -82,6 +90,7 @@ export class NgramComponent extends ParamDirective implements OnChanges {
this.resultsCache = {};
this.allDateFields = this.corpus.fields.filter(field => field.mappingType === 'date');
this.dateField = this.allDateFields[0];
this.currentParameters.dateField = this.dateField.name;
if (this.visualizedField.multiFields) {
this.analysisOptions = [{label: 'None', value: 'none'}]
.concat(this.visualizedField.multiFields.map(subfield => {
Expand All @@ -99,17 +108,10 @@ export class NgramComponent extends ParamDirective implements OnChanges {
}

setParameters(params: Params) {
this.currentParameters = {
size: parseInt(params.get('size'), 10) || this.sizeOptions[0].value,
positions: params.get('positions') || this.positionsOptions[0].value,
freqCompensation: params.get('freqCompensation') !== undefined ?
params.get('freqCompensation') === 'true' :
this.freqCompensationOptions[0].value,
analysis: params.get('analysis') || 'none',
maxDocuments: parseInt(params.get('maxDocuments'), 10) || 50,
numberOfNgrams: parseInt(params.get('numberOfNgrams'), 10) || 10,
dateField: params.get('dateField') || 'date',
};
const ngramSettings = params.get('ngramSettings');
if (ngramSettings) {
this.currentParameters.fromRouteParam(ngramSettings);
}
}

loadGraph() {
Expand Down Expand Up @@ -158,22 +160,17 @@ export class NgramComponent extends ParamDirective implements OnChanges {
}

cacheResult(result: any, params: NgramParameters): void {
const key = this.parametersKey(params);
const key = params.toRouteParam();
this.resultsCache[key] = result;
}

getCachedResult(params: NgramParameters): any {
const key = this.parametersKey(params);
const key = params.toRouteParam();
if (_.has(this.resultsCache, key)) {
return this.resultsCache[key];
}
}

parametersKey(params: NgramParameters): string {
const values = _.values(params);
return _.join(values, '/');
}

setPositionsOptions(size) {
// set positions dropdown options and reset its value
this.positionsOptions = ['any'].concat(['first', 'second', 'third', 'fourth'].slice(0, size)).map(
Expand All @@ -186,8 +183,8 @@ export class NgramComponent extends ParamDirective implements OnChanges {
this.currentParameters[parameter] = value;

if (parameter === 'size' && value) {
this.setPositionsOptions(value);
}
this.setPositionsOptions(value);
}

this.parametersChanged = true;
}
Expand All @@ -200,7 +197,7 @@ export class NgramComponent extends ParamDirective implements OnChanges {

confirmChanges() {
this.parametersChanged = false;
this.setParams(this.currentParameters);
this.setParams({ ngramSettings: this.currentParameters.toRouteParam() });
}

get currentSizeOption() {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "i-analyzer",
"version": "4.3.0",
"version": "5.0.0",
"license": "MIT",
"scripts": {
"postinstall": "yarn install-back && yarn install-front",
Expand Down

0 comments on commit c5ef6b5

Please sign in to comment.