Skip to content

Commit

Permalink
Merge pull request #1637 from UUDigitalHumanitieslab/feature/entity-d…
Browse files Browse the repository at this point in the history
…ocumentation

Feature/entity documentation
  • Loading branch information
BeritJanssen authored Sep 19, 2024
2 parents 04a27ff + 9801ddd commit a6ac966
Show file tree
Hide file tree
Showing 25 changed files with 256 additions and 56 deletions.
2 changes: 2 additions & 0 deletions backend/corpora/troonredes/description/troonredes.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ Troonredes (throne speeches) are the speeches from the throne that formally mark
Missing years: in 1940-1944 no speech was written.

The transcripts are provided by [troonredes.nl](https://www.troonredes.nl).

The transcripts were enriched with named entities using the [the TextMiNER library](https://github.com/CentreForDigitalHumanities/TextMiNER).
25 changes: 25 additions & 0 deletions documentation/Named-entities.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Named Entities
I-Analyzer has the capacity to display named entities.

## Prerequisites
In order to display a corpus enriched with named entities, install the Annotated Text plugin of Elasticsearch, following the instructions [here](https://www.elastic.co/guide/en/elasticsearch/plugins/8.6/mapper-annotated-text.html).

### Named entity fields
To determine whether named entities are available for a given corpus, the application checks if a given corpus contains fields ending with `:ner`.

If the main content field is called `speech`, the field containing named entity annotations should be called `speech:ner`. This field should have the following Elasticsearch mapping:
```python
{
'type': 'annotated_text'
}
```

Moreover, an enriched corpus should contain the following keyword fields:
- `ner:person`
- `ner:location`
- `ner:organization`
- `ner:miscellaneous`
These can be used to search or filter (to be implemented).

## Enriching a corpus with named entities
To enrich a corpus with named entities, we recommend using the [TextMiNER](https://github.com/CentreForDigitalHumanities/TextMiNER) library. This library will read from an existing index and a specified field name. The content of the field is analyzed with the BERT-based models for named entity recognition provided by [flair](https://github.com/flairNLP/flair). The library then adds named entities to the `annotated_text` field and the keyword fields, as outlined above.
4 changes: 2 additions & 2 deletions frontend/karma.conf.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module.exports = function (config) {
dir: require('path').join(__dirname, 'coverage'), reports: [ 'html', 'lcovonly' ],
fixWebpackSourcePaths: true
},

reporters: ['progress', 'kjhtml'],
port: 9876,
colors: true,
Expand All @@ -32,7 +32,7 @@ module.exports = function (config) {
base: 'Chrome',
flags: [
'--headless',
// '--disable-gpu', this might not be needed http://cvuorinen.net/2017/05/running-angular-tests-in-headless-chrome/
'--disable-gpu',
// Without a remote debugging port, Google Chrome exits immediately.
'--remote-debugging-port=9222',
'--no-sandbox'
Expand Down
5 changes: 3 additions & 2 deletions frontend/src/app/document-page/document-page.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
<div class="section">
<div class="container is-readable">
<div *ngIf="documentFound; else notFound" [ngClass]="{'is-loading': !document}">
<em *ngIf="showNEROption" iaBalloon="Show named entities for this document">Show named entities<ia-toggle (toggled)="toggleNER($event)"></ia-toggle></em>
<div class="level">
<div class="level-left"></div>
<div class="level-left">
<ia-entity-toggle *ngIf="showNEROption" (toggleNER)="toggleNER($event)"></ia-entity-toggle>
</div>
<div class="level-right">
<div class="level-item" *ngIf="document?.hasContext">
<a [routerLink]="contextLink" [queryParams]="contextQueryParams" iaBalloon="view all documents from this {{contextDisplayName}}">
Expand Down
12 changes: 2 additions & 10 deletions frontend/src/app/document-view/document-view.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,8 @@
<ng-template iaTabPanel *ngFor="let field of contentFields" [id]="field.name" [title]="field.displayName" [icon]="documentIcons.text">
<div *ngIf="showEntities && document.entityAnnotations$ | async as annotations ; else showHighlights" class="content"
[attr.lang]="document.language(field)">
<div *ngIf="annotations[field.name]; else plainText">
<span *ngFor="let textSegment of annotations[field.name]">
<mark *ngIf="textSegment.entity !== 'flat'; else unannotatedText" [className]="'entity-'+textSegment.entity">
{{textSegment.text}}
<fa-icon [icon]="entityIcons[textSegment.entity]" [title]="textSegment.entity" class="entity-icon"></fa-icon>
</mark>
<ng-template #unannotatedText>
{{textSegment.text | paragraph}}
</ng-template>
</span>
<div *ngIf="annotations[field.name]; else plainText"
[innerHtml]="annotations[field.name] | entity | paragraph">
</div>
<ng-template #plainText>
<div [innerHtml]="document.fieldValues[field.name] | paragraph"></div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true">
<ng-template pTemplate="header">
<span>Document {{document.position}} of {{page.total}}</span>
<em *ngIf="showNEROption" iaBalloon="Show named entities for this document">Show named entities<ia-toggle (toggled)="toggleNER($event)"></ia-toggle></em>
<ia-entity-toggle *ngIf="showNEROption" (toggleNER)="toggleNER($event)"></ia-entity-toggle>
</ng-template>

<ia-document-view [document]="document" [queryModel]="queryModel" [corpus]="document.corpus" [view]="view" [showEntities]="showNamedEntities"></ia-document-view>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ComponentFixture, TestBed, fakeAsync, tick, waitForAsync } from '@angular/core/testing';
import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing';
import { By } from '@angular/platform-browser';

import { DocumentPopupComponent } from './document-popup.component';
Expand All @@ -7,7 +7,6 @@ import { makeDocument } from '../../../mock-data/constructor-helpers';
import { mockCorpus, mockCorpus2, mockField } from '../../../mock-data/corpus';
import { DocumentPage } from '@models/document-page';
import { QueryModel } from '@models';
import { query } from '@angular/animations';


describe('DocumentPopupComponent', () => {
Expand All @@ -33,7 +32,7 @@ describe('DocumentPopupComponent', () => {
});

it('does not show the NER toggle for corpora without named entities', () => {
expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeFalsy();
expect(fixture.debugElement.query(By.css('ia-entity-toggle'))).toBeFalsy();
});

it('shows the NER toggle for corpora with named entities', () => {
Expand All @@ -42,6 +41,6 @@ describe('DocumentPopupComponent', () => {
component.queryModel = queryModel;
component.ngOnChanges({queryModel: {previousValue: setModel, currentValue: queryModel, firstChange: false, isFirstChange: null}});
fixture.detectChanges();
expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeTruthy();
expect(fixture.debugElement.query(By.css('ia-entity-toggle'))).toBeTruthy();
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { takeUntil } from 'rxjs/operators';
import * as _ from 'lodash';
import { FoundDocument, QueryModel } from '@models';
import { Subject } from 'rxjs';
import { documentIcons, actionIcons, corpusIcons } from '@shared/icons';
import { actionIcons, documentIcons } from '../../shared/icons';

@Component({
selector: 'ia-document-popup',
Expand Down
11 changes: 5 additions & 6 deletions frontend/src/app/document/document.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,8 @@ import { DocumentPopupComponent } from './document-popup/document-popup.componen
import { DialogModule } from 'primeng/dialog';
import { DocumentPreviewComponent } from './document-preview/document-preview.component';
import { EntityLegendComponent } from './entity-legend/entity-legend.component';
import {
ElasticsearchHighlightPipe,
GeoDataPipe,
ParagraphPipe,
SnippetPipe,
} from '@shared/pipes';
import { EntityToggleComponent } from './entity-toggle/entity-toggle.component';
import { ElasticsearchHighlightPipe, EntityPipe, GeoDataPipe, ParagraphPipe, SnippetPipe } from '../shared/pipes';

@NgModule({
declarations: [
Expand All @@ -25,7 +21,9 @@ import {
DocumentPopupComponent,
DocumentPreviewComponent,
EntityLegendComponent,
EntityToggleComponent,
ElasticsearchHighlightPipe,
EntityPipe,
GeoDataPipe,
ParagraphPipe,
SnippetPipe
Expand All @@ -42,6 +40,7 @@ import {
DocumentPageComponent,
DocumentPopupComponent,
EntityLegendComponent,
EntityToggleComponent,
SearchRelevanceComponent,
]
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
</li>
</ul>
<ng-template #noEntities>
<em class="no-entities-message">No named entities were found in this text.</em>
<em class="no-entities-message">No named entities found.</em>
</ng-template>
</footer>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<button class="button" type="button" (click)="showNamedEntityDocumentation()">
<span class="icon">
<fa-icon [icon]="actionIcons.helpAlt" aria-label="help"></fa-icon>
</span>
</button>
<em [id]="toggleLabel">Show named entities<ia-toggle (toggled)="toggleNER.emit($event)" [toggleLabel]="toggleLabel"></ia-toggle></em>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
button {
float: left;
margin-right: 1em;
}
em {
position: absolute;
margin-top: .3em;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { ComponentFixture, TestBed } from '@angular/core/testing';

import { commonTestBed } from '../../common-test-bed';
import { EntityToggleComponent } from './entity-toggle.component';

describe('EntityToggleComponent', () => {
let component: EntityToggleComponent;
let fixture: ComponentFixture<EntityToggleComponent>;

beforeEach(async () => {
await commonTestBed().testingModule.compileComponents();

fixture = TestBed.createComponent(EntityToggleComponent);
component = fixture.componentInstance;
fixture.detectChanges();
});

it('should create', () => {
expect(component).toBeTruthy();
});
});
24 changes: 24 additions & 0 deletions frontend/src/app/document/entity-toggle/entity-toggle.component.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { Component, output } from '@angular/core';

import { actionIcons } from '../../shared/icons';
import { DialogService } from '../../services';

@Component({
selector: 'ia-entity-toggle',
imports: [],
templateUrl: './entity-toggle.component.html',
styleUrl: './entity-toggle.component.scss'
})
export class EntityToggleComponent {
actionIcons = actionIcons;
toggleNER = output<Boolean>();
toggleLabel: string;

constructor(private dialogService: DialogService) {
this.toggleLabel = 'ner-toggle';
}

public showNamedEntityDocumentation() {
this.dialogService.showManualPage('namedentities');
}
}
36 changes: 36 additions & 0 deletions frontend/src/app/shared/pipes/entity.pipe.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { FieldEntities } from '../../models';
import { EntityPipe } from './entity.pipe';

describe('EntityPipe', () => {
const mockInput: Array<FieldEntities> = [
{text: 'Nobody expects the ', entity: 'flat'},
{text: 'Spanish Inquisition', entity: 'organization'},
{text: '!', entity: 'flat'}
];

it('creates an instance', () => {
const pipe = new EntityPipe();
expect(pipe).toBeTruthy();
});

it('adds mark tags to named entity annotations', ()=> {
const pipe = new EntityPipe();
const output = pipe.transform(mockInput.slice(1,2));
expect(output).toContain('<mark ');
expect(output).toContain('</mark>');
expect(output).toContain('<svg ');
expect(output).toContain('</svg>');
});

it('does not change Field Entities of `flat` type', () => {
const pipe = new EntityPipe();
const output = pipe.transform(mockInput.slice(0,1));
expect(output).toEqual(mockInput[0].text);
})

it('concatenates highlighted and non-annotated text', () => {
const pipe = new EntityPipe();
const output = pipe.transform(mockInput);
expect(typeof output).toBe('string');
})
});
33 changes: 33 additions & 0 deletions frontend/src/app/shared/pipes/entity.pipe.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { Pipe, PipeTransform } from '@angular/core';
import { icon } from '@fortawesome/fontawesome-svg-core';

import { entityIcons } from '../icons';
import { FieldEntities } from '../../models';

@Pipe({
name: 'entity'
})
export class EntityPipe implements PipeTransform {
/**
* a pipe to transform a list of FieldEntities into flat text and entities
* wrapped in <mark> tags, with icons indicating the type of named entity.
* Note that this pipe needs to be followed by the | paragraph or | safeHtml pipe;
* otherwise, the icons will be removed due to sanitization
* @param entityArray: list of FieldEntities
* @returns string of mixed text and html.
*/

transform(entityArray: Array<FieldEntities>): string {
const output = entityArray.map(ent => {
if (ent.entity === 'flat') {
return ent.text
}
else {
const iconName = entityIcons[ent.entity];
return `<mark class="entity-${ent.entity}" title="Named Entity ${ent.entity}">${ent.text} ${icon(iconName as any).html}</mark>`
}
})
return output.join('');
}

}
5 changes: 1 addition & 4 deletions frontend/src/app/shared/pipes/geo-data.pipe.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import { Pipe, PipeTransform } from '@angular/core';
import { DomSanitizer } from '@angular/platform-browser';
import { CorpusField, FoundDocument } from '@models';
import { CorpusField, FoundDocument } from '../../models';
@Pipe({
name: 'geoData'
})
export class GeoDataPipe implements PipeTransform {
constructor(private sanitizer: DomSanitizer) {
}

/**
* Transforms GeoJSON data
Expand Down
1 change: 1 addition & 0 deletions frontend/src/app/shared/pipes/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export * from './elasticsearch-highlight.pipe';
export * from './entity.pipe';
export * from './geo-data.pipe';
export * from './paragraph.pipe';
export * from './regex-highlight.pipe';
Expand Down
47 changes: 43 additions & 4 deletions frontend/src/app/shared/pipes/paragraph.pipe.spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,47 @@
import { TestBed } from '@angular/core/testing';
import { DomSanitizer } from '@angular/platform-browser';


import { ParagraphPipe } from './paragraph.pipe';

describe('ParagraphPipe', () => {
it('create an instance', () => {
const pipe = new ParagraphPipe();
expect(pipe).toBeTruthy();
});
let pipe: ParagraphPipe;

beforeEach(() => {
TestBed.configureTestingModule({
providers: [
ParagraphPipe,
{ provide: DomSanitizer, useValue: {
bypassSecurityTrustHtml: (input) => input
}
}
]
});
pipe = TestBed.inject(ParagraphPipe);
})

it('creates an instance', () => {
expect(pipe).toBeTruthy();
});

it('does not alter text without linebreaks', () => {
const input = 'Some text. And some more text. And even more.';
const output = pipe.transform(input);
expect(output).toEqual(input);
});

it('wraps text with linebreaks in paragraph tags', () => {
const input = 'Some text.\nAnd some more text.\nAnd even more.';
const output = pipe.transform(input);
const expected = '<p>Some text.</p><p>And some more text.</p><p>And even more.</p>'
expect(output).toEqual(expected);
});

it('ignores multiple linebreaks', () => {
const input = '\nSome text.\n\n\nAnd some more text.\n\n';
const output = pipe.transform(input);
const expected = '<p>Some text.</p><p>And some more text.</p>'
expect(output).toEqual(expected);
});

});
Loading

0 comments on commit a6ac966

Please sign in to comment.