Skip to content

Commit

Permalink
Merge pull request #64 from LD4P/diacritic-insensitivity
Browse files Browse the repository at this point in the history
configure the indices to use the asciifolding filter for analysis
  • Loading branch information
justinlittman authored Oct 11, 2019
2 parents f491abf + 221d964 commit a5f659f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 3 deletions.
33 changes: 31 additions & 2 deletions __tests__/Pipeline.integration.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ describe('integration tests', () => {
log: 'warning'
})
const resourceSlug = 'stanford12345'
const resourceTitle = 'A cool title'
const resourceTitle = 'A cøol tītlé'
const nonRdfSlug = 'resourceTemplate:foo123:Something:Excellent'
const nonRdfBody = { foo: 'bar', baz: 'quux' }
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
Expand All @@ -32,6 +32,8 @@ describe('integration tests', () => {
})
})

jest.setTimeout(7500)

test('resource index is clear of test document', () => {
return client.search({
index: config.get('resourceIndexName'),
Expand Down Expand Up @@ -79,7 +81,7 @@ describe('integration tests', () => {
// Give the pipeline a chance to run
await sleep(4900)

return client.search({
await client.search({
index: config.get('resourceIndexName'),
type: config.get('indexType'),
body: {
Expand All @@ -96,6 +98,33 @@ describe('integration tests', () => {
const firstHit = response.hits.hits[0]
expect(firstHit._source.title[0]).toEqual(resourceTitle)
})

const searchExpectations = [
{ phrase: resourceTitle, totalHits: 1},
{ phrase: 'cøol tītlé', totalHits: 1},
{ phrase: 'cool title', totalHits: 1},
{ phrase: 'cöôl title', totalHits: 1},
{ phrase: 'COOL title', totalHits: 1},
{ phrase: 'cool', totalHits: 1},
{ phrase: 'title', totalHits: 1},
{ phrase: 'coooool tiiiitle', totalHits: 0},
]
for (const {phrase, totalHits} of searchExpectations) {
await client.search({
index: config.get('resourceIndexName'),
type: config.get('indexType'),
body: {
query: {
match: {
title: phrase
}
}
}
}).then(response => {
// including phrase makes it easier to find the one that fails the test, should the test fail
expect([phrase, response.hits.total]).toEqual([phrase, totalHits])
})
}
})

test('new Trellis resource template is not indexed', async () => {
Expand Down
19 changes: 18 additions & 1 deletion src/Indexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ export default class Indexer {
})
}

// https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-asciifolding-tokenfilter.html
indexSettings() {
return {
analysis : {
analyzer : {
default : {
tokenizer : 'standard',
filter : ['asciifolding']
}
}
}
}
}

/**
* Create indices, if needed, and add field mappings
* @returns {null}
Expand All @@ -79,7 +93,10 @@ export default class Indexer {
const indexExists = await this.client.indices.exists({ index: index })

if (!indexExists) {
await this.client.indices.create({ index: index })
// analysis and filter settings must be provided at index creation time; alternatively, the index can be closed, configured, and reopened.
// otherwise, an error is thrown along the lines of "error setting up indices: [illegal_argument_exception] Can't update non dynamic settings"
// https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/6.x/api-reference.html#_indices_create
await this.client.indices.create({ index: index, body: { settings: this.indexSettings() } })
}

await this.client.indices.putMapping({
Expand Down

0 comments on commit a5f659f

Please sign in to comment.