From 3b6c1baa2869fe7d77553966e2a17b7d2eba5683 Mon Sep 17 00:00:00 2001 From: Martin Richtarsky Date: Tue, 13 Feb 2024 16:16:24 +0100 Subject: [PATCH] Look for class="article" for content Quite a few sites use this, but fail the current heuristic. --- src/utils/get-core-content-text.mjs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/utils/get-core-content-text.mjs b/src/utils/get-core-content-text.mjs index f179bde8..bef4dc8f 100644 --- a/src/utils/get-core-content-text.mjs +++ b/src/utils/get-core-content-text.mjs @@ -43,23 +43,28 @@ function findLargestElement(e) { } export function getCoreContentText() { + function getTextFrom(e) { + return e.innerText || e.textContent + } + for (const [siteName, selectors] of Object.entries(adapters)) { if (location.hostname.includes(siteName)) { const element = getPossibleElementByQuerySelector(selectors) - if (element) return element.innerText || element.textContent + if (element) return getTextFrom(element) break } } + const element = document.querySelector('article') + if (element) { + return getTextFrom(element) + } + const largestElement = findLargestElement(document.body) const secondLargestElement = findLargestElement(largestElement) console.log(largestElement) console.log(secondLargestElement) - function getTextFrom(e) { - return e.innerText || e.textContent - } - let ret if (!largestElement) { ret = getTextFrom(document.body)