From 15bc61306ec7a7895d75c8ed128f4a76492bc1c7 Mon Sep 17 00:00:00 2001 From: Reece Dunham Date: Fri, 19 Apr 2024 12:30:17 -0400 Subject: [PATCH] Remove obsolete lunr indexing code --- docusaurus.config.mjs | 1 - src/search/html-to-doc.mjs | 137 ------------------------------------ src/search/plugin.mjs | 139 ------------------------------------- src/search/utils.mjs | 100 -------------------------- 4 files changed, 377 deletions(-) delete mode 100644 src/search/html-to-doc.mjs delete mode 100644 src/search/plugin.mjs delete mode 100644 src/search/utils.mjs diff --git a/docusaurus.config.mjs b/docusaurus.config.mjs index ff7f9c9..318f8a3 100644 --- a/docusaurus.config.mjs +++ b/docusaurus.config.mjs @@ -191,7 +191,6 @@ export default { } ], "@docusaurus/plugin-content-pages", - // require.resolve("./src/search/plugin.mjs"), [ "@docusaurus/plugin-sitemap", { diff --git a/src/search/html-to-doc.mjs b/src/search/html-to-doc.mjs deleted file mode 100644 index 5f1d487..0000000 --- a/src/search/html-to-doc.mjs +++ /dev/null @@ -1,137 +0,0 @@ -import { parentPort } from "worker_threads" - -// unified imports -import {unified} from "unified" -import parse from "rehype-parse" -import {select} from "hast-util-select" -import {selectAll} from "hast-util-select" -import {toText} from "hast-util-to-text" -import {is} from "unist-util-is" -import {readSync as toVFile_readSync} from "to-vfile" - -const sectionHeaderTest = ({ tagName }) => ["h2", "h3"].includes(tagName) - -// Build search data for a html -function* scanDocuments({ path, url }) { - let vfile - try { - vfile = toVFile_readSync(path) - } catch (e) { - if (e.code !== "ENOENT") { - console.error( - `docusaurus-lunr-search:: unable to read file ${path}` - ) - console.error(e) - } - return - } - - const hast = unified().use(parse, { emitParseErrors: false }).parse(vfile) - - const article = select("article", hast) - if (!article) { - return - } - const markdown = select(".markdown", article) - if (!markdown) { - return - } - - const pageTitleElement = select("h1", article) - if (!pageTitleElement) { - return - } - const pageTitle = toText(pageTitleElement) - const sectionHeaders = getSectionHeaders(markdown) - - const keywords = selectAll('meta[name="keywords"]', hast) - .reduce((acc, metaNode) => { - if (metaNode.properties.content) { - return acc.concat( - metaNode.properties.content.replace(/,/g, " ") - ) - } - return acc - }, []) - .join(" ") - - yield { - title: pageTitle, - type: 0, - sectionRef: "#", - url, - // If there is no sections then push the complete content under page title - content: sectionHeaders.length === 0 ? getContent(markdown) : "", - keywords, - } - - for (const sectionDesc of sectionHeaders) { - const { title, content, ref } = sectionDesc - yield { - title, - type: 1, - pageTitle, - url: `${url}#${ref}`, - content, - } - } -} - -function getContent(element) { - return toText(element) - .replace(/\s\s+/g, " ") - .replace(/(\r\n|\n|\r)/gm, " ") - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) -} - -function getSectionHeaders(markdown) { - let currentSection = null - const result = [] - let contentsAcc = "" - const emitCurrent = () => { - const ref = select(".anchor", currentSection) - result.push({ - title: toText(currentSection).replace(/^#+/, "").replace(/#$/, ""), - ref: ref ? ref.properties.id : "#", - content: contentsAcc, - }) - contentsAcc = "" - currentSection = null - } - - for (const node of markdown.children) { - if (is(node, sectionHeaderTest)) { - if (currentSection) { - emitCurrent() - } - currentSection = node - } else if (currentSection) { - contentsAcc += getContent(node) + " " - } - } - if (currentSection) { - emitCurrent() - } - - return result -} - -function processFile(file) { - let scanned = 0 - for (const doc of scanDocuments(file)) { - scanned = 1 - parentPort.postMessage([true, doc]) - } - parentPort.postMessage([null, scanned]) -} - -parentPort.on("message", (maybeFile) => { - if (maybeFile) { - processFile(maybeFile) - } else { - parentPort.close() - } -}) diff --git a/src/search/plugin.mjs b/src/search/plugin.mjs deleted file mode 100644 index 5f96411..0000000 --- a/src/search/plugin.mjs +++ /dev/null @@ -1,139 +0,0 @@ -import fs from "fs" -import os from "os" -import path from "path" -import lunr from "lunr" -import { Worker } from "worker_threads" - -// local imports -const utils = require("./utils.mjs") - -export default (context, options) => { - options = options || {} - let languages - - const guid = String(Date.now()) - const fileNames = { - searchDoc: `search-doc-${guid}.json`, - lunrIndex: `lunr-index-${guid}.json`, - } - - return { - name: "docusaurus-lunr-search", - configureWebpack(config) { - // Multilingual issue fix - const generatedFilesDir = config.resolve.alias["@generated"] - languages = utils.generateLunrClientJS( - generatedFilesDir, - options.languages - ) - return {} - }, - async contentLoaded({ actions }) { - actions.setGlobalData({ fileNames: fileNames }) - }, - async postBuild({ routesPaths = [], outDir, baseUrl }) { - const [files] = utils.getFilePaths( - routesPaths, - outDir, - baseUrl, - options - ) - const searchDocuments = [] - const lunrBuilder = lunr(function (builder) { - if (languages) { - this.use(languages) - } - this.ref("id") - this.field("title", { boost: 200 }) - this.field("content", { boost: 2 }) - this.field("keywords", { boost: 100 }) - this.metadataWhitelist = ["position"] - - const { build } = builder - builder.build = () => { - builder.build = build - return builder - } - }) - - const addToSearchData = (d) => { - lunrBuilder.add({ - id: searchDocuments.length, - title: d.title, - content: d.content, - keywords: d.keywords, - }) - searchDocuments.push(d) - } - - await buildSearchData(files, addToSearchData) - const lunrIndex = lunrBuilder.build() - - const searchDocFileContents = JSON.stringify(searchDocuments) - fs.writeFileSync( - path.join(outDir, fileNames.searchDoc), - searchDocFileContents - ) - - const lunrIndexFileContents = JSON.stringify(lunrIndex) - fs.writeFileSync( - path.join(outDir, fileNames.lunrIndex), - lunrIndexFileContents - ) - }, - } -} - -function buildSearchData(files, addToSearchData) { - if (!files.length) { - return Promise.resolve() - } - let activeWorkersCount = 0 - const workerCount = Math.max(2, os.cpus().length) - - let indexedDocuments = 0 // Documents that have added at least one value to the index - - return new Promise((resolve, reject) => { - let nextIndex = 0 - - const handleMessage = ([isDoc, payload], worker) => { - if (isDoc) { - addToSearchData(payload) - } else { - indexedDocuments += payload - - if (nextIndex < files.length) { - worker.postMessage(files[nextIndex++]) - } else { - worker.postMessage(null) - } - } - } - - for (let i = 0; i < workerCount; i++) { - if (nextIndex >= files.length) { - break - } - const worker = new Worker(path.join(__dirname, "html-to-doc.mjs")) - worker.on("error", reject) - worker.on("message", (message) => { - handleMessage(message, worker) - }) - worker.on("exit", (code) => { - if (code !== 0) { - reject(new Error(`Scanner stopped with exit code ${code}`)) - } else { - // Worker #${i} completed their work in worker pool - activeWorkersCount-- - if (activeWorkersCount <= 0) { - // No active workers left, we are done - resolve(indexedDocuments) - } - } - }) - - activeWorkersCount++ - worker.postMessage(files[nextIndex++]) - } - }) -} diff --git a/src/search/utils.mjs b/src/search/utils.mjs deleted file mode 100644 index 43a3fa8..0000000 --- a/src/search/utils.mjs +++ /dev/null @@ -1,100 +0,0 @@ -import path from "path" -import fs from "fs" -import lunr from "lunr" -import { minimatch } from "minimatch" -import { createRequire } from "module" - -const require = createRequire(import.meta.url) - -/** - * Based on code from https://github.com/cmfcmf/docusaurus-search-local/ - * by Christian Flach, licensed under the MIT license. - */ -function generateLunrClientJS(outDir, language = "en") { - if (Array.isArray(language) && language.length === 1) { - language = language[0] - } - let lunrClient = - "// THIS FILE IS AUTOGENERATED\n" + - "// DO NOT EDIT THIS FILE!\n\n" + - "import * as lunr from \"lunr\";\n" - - if (language !== "en") { - require("lunr-languages/lunr.stemmer.support")(lunr) - lunrClient += "require(\"lunr-languages/lunr.stemmer.support\")(lunr);\n" - if (Array.isArray(language)) { - language - .filter((code) => code !== "en") - .forEach((code) => { - require(`lunr-languages/lunr.${code}`)(lunr) - lunrClient += `require("lunr-languages/lunr.${code}")(lunr);\n` - }) - require("lunr-languages/lunr.multi")(lunr) - lunrClient += `require("lunr-languages/lunr.multi")(lunr);\n` - } else { - require(`lunr-languages/lunr.${language}`)(lunr) - lunrClient += `require("lunr-languages/lunr.${language}")(lunr);\n` - } - } - lunrClient += `export default lunr;\n` - - const lunrClientPath = path.join(outDir, "lunr.client.js") - fs.writeFileSync(lunrClientPath, lunrClient) - - if (language !== "en") { - if (Array.isArray(language)) { - return lunr.multiLanguage(...language) - } else { - return lunr[language] - } - } - return null -} - -function getFilePaths(routesPaths, outDir, baseUrl, options = {}) { - const files = [] - const addedFiles = new Set() - const { excludeRoutes = [], indexBaseUrl = false } = options - const meta = { - excludedCount: 0 - } - - routesPaths.forEach((route) => { - if ( - (!indexBaseUrl && route === baseUrl) || - route === `${baseUrl}404.html` - ) { - return - } - - route = route.substring(baseUrl.length) - const filePath = path.join(outDir, route, "index.html") - - // In case docs only mode routesPaths has baseUrl twice - if (addedFiles.has(filePath)) { - return - } - - if ( - excludeRoutes.some((excludePattern) => - minimatch(route, excludePattern) - ) - ) { - meta.excludedCount++ - return - } - - files.push({ - path: filePath, - url: route - }) - addedFiles.add(filePath) - }) - - return [files, meta] -} - -export { - generateLunrClientJS, - getFilePaths -}