From 15bc61306ec7a7895d75c8ed128f4a76492bc1c7 Mon Sep 17 00:00:00 2001
From: Reece Dunham <me@rdil.rocks>
Date: Fri, 19 Apr 2024 12:30:17 -0400
Subject: [PATCH] Remove obsolete lunr indexing code

---
 docusaurus.config.mjs      |   1 -
 src/search/html-to-doc.mjs | 137 ------------------------------------
 src/search/plugin.mjs      | 139 -------------------------------------
 src/search/utils.mjs       | 100 --------------------------
 4 files changed, 377 deletions(-)
 delete mode 100644 src/search/html-to-doc.mjs
 delete mode 100644 src/search/plugin.mjs
 delete mode 100644 src/search/utils.mjs

diff --git a/docusaurus.config.mjs b/docusaurus.config.mjs
index ff7f9c9..318f8a3 100644
--- a/docusaurus.config.mjs
+++ b/docusaurus.config.mjs
@@ -191,7 +191,6 @@ export default {
             }
         ],
         "@docusaurus/plugin-content-pages",
-        // require.resolve("./src/search/plugin.mjs"),
         [
             "@docusaurus/plugin-sitemap",
             {
diff --git a/src/search/html-to-doc.mjs b/src/search/html-to-doc.mjs
deleted file mode 100644
index 5f1d487..0000000
--- a/src/search/html-to-doc.mjs
+++ /dev/null
@@ -1,137 +0,0 @@
-import { parentPort } from "worker_threads"
-
-// unified imports
-import {unified} from "unified"
-import parse from "rehype-parse"
-import {select} from "hast-util-select"
-import {selectAll} from "hast-util-select"
-import {toText} from "hast-util-to-text"
-import {is} from "unist-util-is"
-import {readSync as toVFile_readSync} from "to-vfile"
-
-const sectionHeaderTest = ({ tagName }) => ["h2", "h3"].includes(tagName)
-
-// Build search data for a html
-function* scanDocuments({ path, url }) {
-    let vfile
-    try {
-        vfile = toVFile_readSync(path)
-    } catch (e) {
-        if (e.code !== "ENOENT") {
-            console.error(
-                `docusaurus-lunr-search:: unable to read file ${path}`
-            )
-            console.error(e)
-        }
-        return
-    }
-
-    const hast = unified().use(parse, { emitParseErrors: false }).parse(vfile)
-
-    const article = select("article", hast)
-    if (!article) {
-        return
-    }
-    const markdown = select(".markdown", article)
-    if (!markdown) {
-        return
-    }
-
-    const pageTitleElement = select("h1", article)
-    if (!pageTitleElement) {
-        return
-    }
-    const pageTitle = toText(pageTitleElement)
-    const sectionHeaders = getSectionHeaders(markdown)
-
-    const keywords = selectAll('meta[name="keywords"]', hast)
-        .reduce((acc, metaNode) => {
-            if (metaNode.properties.content) {
-                return acc.concat(
-                    metaNode.properties.content.replace(/,/g, " ")
-                )
-            }
-            return acc
-        }, [])
-        .join(" ")
-
-    yield {
-        title: pageTitle,
-        type: 0,
-        sectionRef: "#",
-        url,
-        // If there is no sections then push the complete content under page title
-        content: sectionHeaders.length === 0 ? getContent(markdown) : "",
-        keywords,
-    }
-
-    for (const sectionDesc of sectionHeaders) {
-        const { title, content, ref } = sectionDesc
-        yield {
-            title,
-            type: 1,
-            pageTitle,
-            url: `${url}#${ref}`,
-            content,
-        }
-    }
-}
-
-function getContent(element) {
-    return toText(element)
-        .replace(/\s\s+/g, " ")
-        .replace(/(\r\n|\n|\r)/gm, " ")
-        .replace(/&/g, "&amp;")
-        .replace(/</g, "&lt;")
-        .replace(/>/g, "&gt;")
-        .replace(/"/g, "&quot;")
-}
-
-function getSectionHeaders(markdown) {
-    let currentSection = null
-    const result = []
-    let contentsAcc = ""
-    const emitCurrent = () => {
-        const ref = select(".anchor", currentSection)
-        result.push({
-            title: toText(currentSection).replace(/^#+/, "").replace(/#$/, ""),
-            ref: ref ? ref.properties.id : "#",
-            content: contentsAcc,
-        })
-        contentsAcc = ""
-        currentSection = null
-    }
-
-    for (const node of markdown.children) {
-        if (is(node, sectionHeaderTest)) {
-            if (currentSection) {
-                emitCurrent()
-            }
-            currentSection = node
-        } else if (currentSection) {
-            contentsAcc += getContent(node) + " "
-        }
-    }
-    if (currentSection) {
-        emitCurrent()
-    }
-
-    return result
-}
-
-function processFile(file) {
-    let scanned = 0
-    for (const doc of scanDocuments(file)) {
-        scanned = 1
-        parentPort.postMessage([true, doc])
-    }
-    parentPort.postMessage([null, scanned])
-}
-
-parentPort.on("message", (maybeFile) => {
-    if (maybeFile) {
-        processFile(maybeFile)
-    } else {
-        parentPort.close()
-    }
-})
diff --git a/src/search/plugin.mjs b/src/search/plugin.mjs
deleted file mode 100644
index 5f96411..0000000
--- a/src/search/plugin.mjs
+++ /dev/null
@@ -1,139 +0,0 @@
-import fs from "fs"
-import os from "os"
-import path from "path"
-import lunr from "lunr"
-import { Worker } from "worker_threads"
-
-// local imports
-const utils = require("./utils.mjs")
-
-export default (context, options) => {
-    options = options || {}
-    let languages
-
-    const guid = String(Date.now())
-    const fileNames = {
-        searchDoc: `search-doc-${guid}.json`,
-        lunrIndex: `lunr-index-${guid}.json`,
-    }
-
-    return {
-        name: "docusaurus-lunr-search",
-        configureWebpack(config) {
-            // Multilingual issue fix
-            const generatedFilesDir = config.resolve.alias["@generated"]
-            languages = utils.generateLunrClientJS(
-                generatedFilesDir,
-                options.languages
-            )
-            return {}
-        },
-        async contentLoaded({ actions }) {
-            actions.setGlobalData({ fileNames: fileNames })
-        },
-        async postBuild({ routesPaths = [], outDir, baseUrl }) {
-            const [files] = utils.getFilePaths(
-                routesPaths,
-                outDir,
-                baseUrl,
-                options
-            )
-            const searchDocuments = []
-            const lunrBuilder = lunr(function (builder) {
-                if (languages) {
-                    this.use(languages)
-                }
-                this.ref("id")
-                this.field("title", { boost: 200 })
-                this.field("content", { boost: 2 })
-                this.field("keywords", { boost: 100 })
-                this.metadataWhitelist = ["position"]
-
-                const { build } = builder
-                builder.build = () => {
-                    builder.build = build
-                    return builder
-                }
-            })
-
-            const addToSearchData = (d) => {
-                lunrBuilder.add({
-                    id: searchDocuments.length,
-                    title: d.title,
-                    content: d.content,
-                    keywords: d.keywords,
-                })
-                searchDocuments.push(d)
-            }
-
-            await buildSearchData(files, addToSearchData)
-            const lunrIndex = lunrBuilder.build()
-
-            const searchDocFileContents = JSON.stringify(searchDocuments)
-            fs.writeFileSync(
-                path.join(outDir, fileNames.searchDoc),
-                searchDocFileContents
-            )
-
-            const lunrIndexFileContents = JSON.stringify(lunrIndex)
-            fs.writeFileSync(
-                path.join(outDir, fileNames.lunrIndex),
-                lunrIndexFileContents
-            )
-        },
-    }
-}
-
-function buildSearchData(files, addToSearchData) {
-    if (!files.length) {
-        return Promise.resolve()
-    }
-    let activeWorkersCount = 0
-    const workerCount = Math.max(2, os.cpus().length)
-
-    let indexedDocuments = 0 // Documents that have added at least one value to the index
-
-    return new Promise((resolve, reject) => {
-        let nextIndex = 0
-
-        const handleMessage = ([isDoc, payload], worker) => {
-            if (isDoc) {
-                addToSearchData(payload)
-            } else {
-                indexedDocuments += payload
-
-                if (nextIndex < files.length) {
-                    worker.postMessage(files[nextIndex++])
-                } else {
-                    worker.postMessage(null)
-                }
-            }
-        }
-
-        for (let i = 0; i < workerCount; i++) {
-            if (nextIndex >= files.length) {
-                break
-            }
-            const worker = new Worker(path.join(__dirname, "html-to-doc.mjs"))
-            worker.on("error", reject)
-            worker.on("message", (message) => {
-                handleMessage(message, worker)
-            })
-            worker.on("exit", (code) => {
-                if (code !== 0) {
-                    reject(new Error(`Scanner stopped with exit code ${code}`))
-                } else {
-                    // Worker #${i} completed their work in worker pool
-                    activeWorkersCount--
-                    if (activeWorkersCount <= 0) {
-                        // No active workers left, we are done
-                        resolve(indexedDocuments)
-                    }
-                }
-            })
-
-            activeWorkersCount++
-            worker.postMessage(files[nextIndex++])
-        }
-    })
-}
diff --git a/src/search/utils.mjs b/src/search/utils.mjs
deleted file mode 100644
index 43a3fa8..0000000
--- a/src/search/utils.mjs
+++ /dev/null
@@ -1,100 +0,0 @@
-import path from "path"
-import fs from "fs"
-import lunr from "lunr"
-import { minimatch } from "minimatch"
-import { createRequire } from "module"
-
-const require = createRequire(import.meta.url)
-
-/**
- * Based on code from https://github.com/cmfcmf/docusaurus-search-local/
- * by Christian Flach, licensed under the MIT license.
- */
-function generateLunrClientJS(outDir, language = "en") {
-    if (Array.isArray(language) && language.length === 1) {
-        language = language[0]
-    }
-    let lunrClient =
-        "// THIS FILE IS AUTOGENERATED\n" +
-        "// DO NOT EDIT THIS FILE!\n\n" +
-        "import * as lunr from \"lunr\";\n"
-
-    if (language !== "en") {
-        require("lunr-languages/lunr.stemmer.support")(lunr)
-        lunrClient += "require(\"lunr-languages/lunr.stemmer.support\")(lunr);\n"
-        if (Array.isArray(language)) {
-            language
-                .filter((code) => code !== "en")
-                .forEach((code) => {
-                    require(`lunr-languages/lunr.${code}`)(lunr)
-                    lunrClient += `require("lunr-languages/lunr.${code}")(lunr);\n`
-                })
-            require("lunr-languages/lunr.multi")(lunr)
-            lunrClient += `require("lunr-languages/lunr.multi")(lunr);\n`
-        } else {
-            require(`lunr-languages/lunr.${language}`)(lunr)
-            lunrClient += `require("lunr-languages/lunr.${language}")(lunr);\n`
-        }
-    }
-    lunrClient += `export default lunr;\n`
-
-    const lunrClientPath = path.join(outDir, "lunr.client.js")
-    fs.writeFileSync(lunrClientPath, lunrClient)
-
-    if (language !== "en") {
-        if (Array.isArray(language)) {
-            return lunr.multiLanguage(...language)
-        } else {
-            return lunr[language]
-        }
-    }
-    return null
-}
-
-function getFilePaths(routesPaths, outDir, baseUrl, options = {}) {
-    const files = []
-    const addedFiles = new Set()
-    const { excludeRoutes = [], indexBaseUrl = false } = options
-    const meta = {
-        excludedCount: 0
-    }
-
-    routesPaths.forEach((route) => {
-        if (
-            (!indexBaseUrl && route === baseUrl) ||
-            route === `${baseUrl}404.html`
-        ) {
-            return
-        }
-
-        route = route.substring(baseUrl.length)
-        const filePath = path.join(outDir, route, "index.html")
-
-        // In case docs only mode routesPaths has baseUrl twice
-        if (addedFiles.has(filePath)) {
-            return
-        }
-
-        if (
-            excludeRoutes.some((excludePattern) =>
-                minimatch(route, excludePattern)
-            )
-        ) {
-            meta.excludedCount++
-            return
-        }
-
-        files.push({
-            path: filePath,
-            url: route
-        })
-        addedFiles.add(filePath)
-    })
-
-    return [files, meta]
-}
-
-export {
-    generateLunrClientJS,
-    getFilePaths
-}