From b9199abdb77d2010286a0ec4864f7822e33fde0c Mon Sep 17 00:00:00 2001 From: Ulysses Alvarez Date: Thu, 20 Feb 2025 09:28:00 +0000 Subject: [PATCH] fixed issues with postcode multiple entries and a few edge cases for location names. --- db-1740041461001.json | 126 ++++++++++++++++++ package-lock.json | 12 +- package.json | 2 +- src/config/index.js | 2 +- src/server/locations/cy/middleware-cy.js | 8 ++ .../locations/helpers/convert-string.js | 49 ++++++- .../locations/helpers/middleware-helpers.js | 26 +++- src/server/locations/middleware.js | 2 + 8 files changed, 211 insertions(+), 16 deletions(-) create mode 100644 db-1740041461001.json diff --git a/db-1740041461001.json b/db-1740041461001.json new file mode 100644 index 0000000..19eb508 --- /dev/null +++ b/db-1740041461001.json @@ -0,0 +1,126 @@ +{ + "results": [ + { + "addressLine": "ROYAL MAIL, 20, DONEGALL QUAY, BELFAST, BT1 1AA", + "subBuildingName": "ROYAL MAIL", + "buildingNumber": "20", + "street": "DONEGALL QUAY", + "town": "BELFAST", + "administrativeArea": "BELFAST", + "historicCounty": "COUNTY ANTRIM", + "ceremonialCounty": "COUNTY ANTRIM", + "postcode": "BT1 1AA", + "country": "NORTHERN IRELAND", + "xCoordinate": 146778, + "yCoordinate": 530104, + "uprn": "185870402", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + }, + { + "addressLine": "ROYAL MAIL, 20, DONEGALL QUAY, BELFAST, BT1 1AA", + "subBuildingName": "ROYAL MAIL", + "buildingNumber": "20", + "street": "DONEGALL QUAY", + "town": "BELFAST", + "administrativeArea": "BELFAST", + "historicCounty": "COUNTY ANTRIM", + "ceremonialCounty": "COUNTY ANTRIM", + "postcode": "BT1 1AA", + "country": "NORTHERN IRELAND", + "xCoordinate": 146778, + "yCoordinate": 530104, + "uprn": "185870402", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + }, + { + "addressLine": "24, MACNEAN PARK, BELCOO EAST***", + "buildingNumber": "24", + "street": "MACNEAN PARK", + "locality": "BELCOO", + "town": "ENNISKILLEN", + "administrativeArea": "FERMANAGH AND OMAGH", + "historicCounty": "COUNTY FERMANAGH", + "ceremonialCounty": "COUNTY FERMANAGH", + "postcode": "BT93 5EZ", + "country": "NORTHERN IRELAND", + "xCoordinate": 18204, + "yCoordinate": 505047, + "uprn": "185718202", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + }, + { + "addressLine": "47, ELAINE STREET, BELFAST, BT9 5AR", + "buildingNumber": "47", + "street": "ELAINE STREET", + "town": "BELFAST", + "administrativeArea": "BELFAST", + "historicCounty": "COUNTY ANTRIM", + "ceremonialCounty": "COUNTY ANTRIM", + "postcode": "BT9 5AR", + "country": "NORTHERN IRELAND", + "xCoordinate": 145927, + "yCoordinate": 527774, + "uprn": "185070987", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + }, + { + "addressLine": "***", + "buildingNumber": "70", + "street": "CLIFTONVILLE ROAD", + "town": "BELFAST", + "administrativeArea": "BELFAST", + "historicCounty": "COUNTY ANTRIM", + "ceremonialCounty": "COUNTY ANTRIM", + "postcode": "BT14 6JZ", + "country": "NORTHERN IRELAND", + "xCoordinate": 145561, + "yCoordinate": 531625, + "uprn": "185759380", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + }, + { + "addressLine": "1 HARTINGTON COURT, HARTINGTON ROAD, LONDON, W4 3TT", + "buildingName": "1 HARTINGTON COURT", + "street": "HARTINGTON ROAD", + "town": "LONDON", + "administrativeArea": "HOUNSLOW", + "historicCounty": "MIDDLESEX", + "ceremonialCounty": "GREATER LONDON", + "postcode": "W4 3TT", + "country": "ENGLAND", + "xCoordinate": 519925, + "yCoordinate": 177202, + "uprn": "100021585737", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + }, + { + "addressLine": "13 GARTH COURT, ELLESMERE ROAD, LONDON, W4 4QL", + "buildingName": "13 GARTH COURT", + "street": "ELLESMERE ROAD", + "town": "LONDON", + "administrativeArea": "HOUNSLOW", + "historicCounty": "MIDDLESEX", + "ceremonialCounty": "GREATER LONDON", + "postcode": "W4 4QL", + "country": "ENGLAND", + "xCoordinate": 520534, + "yCoordinate": 177980, + "uprn": "100023491749", + "match": "1", + "matchDescription": "EXACT", + "language": "EN" + } + ] +} diff --git a/package-lock.json b/package-lock.json index 2ea69b8..a50ba07 100644 --- a/package-lock.json +++ b/package-lock.json @@ -35,7 +35,7 @@ "nunjucks": "3.2.4", "pino": "8.20.0", "proj4": "2.9.0", - "undici": "6.12.0" + "undici": "6.21.1" }, "devDependencies": { "@babel/cli": "7.24.1", @@ -11063,6 +11063,7 @@ "resolved": "https://registry.npmjs.org/json-server/-/json-server-0.17.4.tgz", "integrity": "sha512-bGBb0WtFuAKbgI7JV3A864irWnMZSvBYRJbohaOuatHwKSRFUfqtQlrYMrB6WbalXy/cJabyjlb7JkHli6dYjQ==", "dev": true, + "license": "MIT", "dependencies": { "body-parser": "^1.19.0", "chalk": "^4.1.2", @@ -15426,11 +15427,12 @@ "dev": true }, "node_modules/undici": { - "version": "6.12.0", - "resolved": "https://registry.npmjs.org/undici/-/undici-6.12.0.tgz", - "integrity": "sha512-d87yk8lqSFUYtR5fTFe2frpkMIrUEz+lgoJmhcL+J3StVl+8fj8ytE4lLnJOTPCE12YbumNGzf4LYsQyusdV5g==", + "version": "6.21.1", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.1.tgz", + "integrity": "sha512-q/1rj5D0/zayJB2FraXdaWxbhWiNKDvu8naDT2dl1yTlvJp4BLtOcp2a5BvgGNQpYYJzau7tf1WgKv3b+7mqpQ==", + "license": "MIT", "engines": { - "node": ">=18.0" + "node": ">=18.17" } }, "node_modules/undici-types": { diff --git a/package.json b/package.json index 1dd598a..1ffcc9c 100644 --- a/package.json +++ b/package.json @@ -60,7 +60,7 @@ "nunjucks": "3.2.4", "pino": "8.20.0", "proj4": "2.9.0", - "undici": "6.12.0" + "undici": "6.21.1" }, "devDependencies": { "@babel/cli": "7.24.1", diff --git a/src/config/index.js b/src/config/index.js index 3c04d9b..80fac6e 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -56,7 +56,7 @@ const config = convict({ enabledMock: { doc: 'Enabled Mock Data for Northern Ireland Names API', format: Boolean, - default: false + default: true }, logLevel: { doc: 'Logging level', diff --git a/src/server/locations/cy/middleware-cy.js b/src/server/locations/cy/middleware-cy.js index c7bc577..315c7f3 100644 --- a/src/server/locations/cy/middleware-cy.js +++ b/src/server/locations/cy/middleware-cy.js @@ -219,6 +219,14 @@ const searchMiddlewareCy = async (request, h) => { locationNameOrPostcode, lang ) + if ( + !getNIPlaces?.results || + getNIPlaces?.results.length === 0 || + getNIPlaces === 'wrong postcode' + ) { + request.yar.set('locationDataNotFound', { locationNameOrPostcode, lang }) + return h.redirect('/lleoliad-heb-ei-ganfod/cy').takeover() + } logger.info(`::::::LOCATION_TYPE_NI-CY::::::: , ${getNIPlaces?.results[0]}`) nearestLocationsRangeEnglish = getNearestLocation( diff --git a/src/server/locations/helpers/convert-string.js b/src/server/locations/helpers/convert-string.js index e51b388..237ce23 100644 --- a/src/server/locations/helpers/convert-string.js +++ b/src/server/locations/helpers/convert-string.js @@ -50,7 +50,7 @@ function extractAndFormatUKPostcode(headerTitle) { if (match) { // Check if a postcode is found const postcode = match[0] // Extract the matched postcode - // postcode = postcode.replace(/[-_]/g, ' '); // Replace hyphens and underscores with spaces in the postc + // postcode = postcode.replace(/[-_]/g, ' ') // Replace hyphens and underscores with spaces in the postc return postcode // Return the formatted postcode } return null // Return null if no postcode is found @@ -64,7 +64,7 @@ function removeAllWordsAfterUnderscore(str) { function isValidPartialPostcode(postcode) { // Define a function to validate if a string is a partial postcode - const partialPostcodeRegex = /\b(?!BT)(?:[A-Z]{1,2}\d{1,2}|EN1|EN8|N8)\b/i // Define a regular expression to match UK partial postcodes' return partialPostcodeRegex.test(postcode); // Test the string against the regular expression '' + const partialPostcodeRegex = /\b(?!BT)(?:[A-Z]{1,2}\d{1,2}|EN1|EN8|N8)\b/i // Define a regular expression to match UK partial postcodes' return partialPostcodeRegex.test(postcode) // Test the string against the regular expression '' return partialPostcodeRegex.test(postcode) // Test the string against the regular expression } @@ -85,6 +85,47 @@ function formatUKPostcode(postcode) { return postcode.toUpperCase() // Return the original postcode in uppercase if it doesn't match the regex } +function splitAndCheckSpecificWords(sourceString, targetString) { + // Define a function to split a string and check if another string contains the exact first two words together or the exact last word + const words = sourceString.split(' ') // Split the source string into an array of words + if (words.length === 2) { + // Check if the source string contains exactly two words + const [firstWord, secondWord] = words // Destructure the array to get the first two words + const firstTwoWords = `${firstWord} ${secondWord}` // Combine the first two words + const exactLastWord = new RegExp(`\\b${secondWord}\\b`) // Create a regex to match the exact last word + const joinedWords = words.join(' ') // Join the words with a space + return ( + targetString.includes(firstTwoWords) || + exactLastWord.test(targetString) || + joinedWords + ) // Check if the target string contains the exact first two words together or the exact last word + } else if (words.length === 3) { + // Check if the source string contains exactly three words + const [firstWord, secondWord, lastWord] = words // Destructure the array to get the first two and the last word + const firstTwoWords = `${firstWord} ${secondWord}` // Combine the first two words + const exactLastWord = new RegExp(`\\b${lastWord}\\b`) // Create a regex to match the exact last word + const firstArray = [firstWord, secondWord] + const joinedWords = firstArray.join('') + const exactJoinedWord = new RegExp(`\\b${joinedWords}\\b`) + return ( + targetString.includes(firstTwoWords) || + exactLastWord.test(targetString) || + exactJoinedWord.test(joinedWords) + ) // Check if the target string contains the exact first two words together or the exact last word + } + return false // Return false if the source string does not contain exactly two or three words +} + +function splitAndCheckExactWords(sourceString, targetString) { + // Define a function to split a string and check if another string contains exactly any of the three words + const words = sourceString.split(' ') // Split the source string into an array of words + if (words.length >= 3) { + // Check if the source string contains exactly three words + return words.some((word) => new RegExp(`\\b${word}\\b`).test(targetString)) // Check if the target string contains exactly any of the three words + } + return false // Return false if the source string does not contain exactly three words +} + export { removeAllWordsAfterUnderscore, convertStringToHyphenatedLowercaseWords, @@ -95,5 +136,7 @@ export { isValidPartialPostcode, splitAndKeepFirstWord, formatUKPostcode, - isValidFullPostcode + isValidFullPostcode, + splitAndCheckSpecificWords, + splitAndCheckExactWords } diff --git a/src/server/locations/helpers/middleware-helpers.js b/src/server/locations/helpers/middleware-helpers.js index 81304ab..ebf775f 100644 --- a/src/server/locations/helpers/middleware-helpers.js +++ b/src/server/locations/helpers/middleware-helpers.js @@ -8,7 +8,8 @@ import { splitAndKeepFirstWord, removeLastWordAndAddHyphens, isValidFullPostcode, - formatUKPostcode + formatUKPostcode, + splitAndCheckSpecificWords } from '~/src/server/locations/helpers/convert-string' import { LANG_EN, LANG_CY } from '~/src/server/data/constants' import { createLogger } from '~/src/server/common/helpers/logging/logger' @@ -146,6 +147,7 @@ const processMatches = ( searchTerms, secondSearchTerm ) => { + const fullPostcodePattern = /\b([A-Z]{1,2}\d[A-Z\d]?)\s?(\d[A-Z]{2})\b/i const partialPostcodePattern = /\b(?!BT)(?:[A-Z]{1,2}\d{1,2}|EN1|EN8|N8)\b/i let newMatches = matches.filter((item) => { const name1 = item?.GAZETTEER_ENTRY.NAME1.toUpperCase().replace(/\s+/g, '') @@ -219,24 +221,34 @@ const processMatches = ( logger.info(`name1 default in middleware NI ${name1}`) logger.info(`userLocation default in middleware NI ${userLocation}`) logger.info(`name2 default in middleware NI ${name2}`) + const checkWords = splitAndCheckSpecificWords(userLocation, name1) return ( + checkWords || name1.includes(userLocation.replace(/\s+/g, '')) || userLocation.includes(name1) || userLocation.includes(name2) ) }) if ( - (partialPostcodePattern.test(locationNameOrPostcode.toUpperCase()) && + newMatches.length > 3 && + !fullPostcodePattern.test(locationNameOrPostcode.toUpperCase()) && + !partialPostcodePattern.test(locationNameOrPostcode.toUpperCase()) + ) { + newMatches = newMatches.slice(0, 2) + } + if ( + (fullPostcodePattern.test(locationNameOrPostcode.toUpperCase()) && newMatches.length > 0) || - (!partialPostcodePattern.test(locationNameOrPostcode.toUpperCase()) && - newMatches.length === 2) + (fullPostcodePattern.test(locationNameOrPostcode.toUpperCase()) && + newMatches.length === 2) || + newMatches.length > 3 ) { if (newMatches[0].GAZETTEER_ENTRY.NAME2) { newMatches[0].GAZETTEER_ENTRY.NAME1 = newMatches[0].GAZETTEER_ENTRY.NAME2 } else { newMatches[0].GAZETTEER_ENTRY.NAME1 = locationNameOrPostcode.toUpperCase() // Set the name to the partial postcode } - newMatches = [newMatches[0]] + const urlRoute = `${newMatches[0].GAZETTEER_ENTRY.NAME1}_${newMatches[0].GAZETTEER_ENTRY.DISTRICT_BOROUGH}` let headerTitle = convertStringToHyphenatedLowercaseWords(urlRoute) headerTitle = headerTitle.replace(/-/g, ' ') @@ -246,6 +258,8 @@ const processMatches = ( ? splitAndKeepFirstWord(headerTitle) : removeLastWordAndAddHyphens(headerTitle) newMatches[0].GAZETTEER_ENTRY.ID = finalHeaderTitle + newMatches = [newMatches[0]] + return newMatches } @@ -297,7 +311,7 @@ const getTitleAndHeaderTitle = (locationDetails, locationNameOrPostcode) => { } else { title = `${locationNameOrPostcode}, ${locationDetails[0].GAZETTEER_ENTRY.COUNTY_UNITARY} - ${home.pageTitle}` headerTitle = `${locationNameOrPostcode}, ${locationDetails[0].GAZETTEER_ENTRY.COUNTY_UNITARY}` - urlRoute = `${locationNameOrPostcode}_${locationDetails[0].GAZETTEER_ENTRY.COUNTY_UNITARY}` + urlRoute = `${locationDetails[0].GAZETTEER_ENTRY.NAME1}_${locationDetails[0].GAZETTEER_ENTRY.COUNTY_UNITARY}` term1 = locationNameOrPostcode } } diff --git a/src/server/locations/middleware.js b/src/server/locations/middleware.js index 2b5277b..8916c8d 100644 --- a/src/server/locations/middleware.js +++ b/src/server/locations/middleware.js @@ -222,6 +222,8 @@ const searchMiddleware = async (request, h) => { backlink, cookieBanner, calendarWelsh, + headerTitle, + title, month, welshDate, englishDate,