Skip to content

Commit

Permalink
Site-specific parsers & streamline images
Browse files Browse the repository at this point in the history
  • Loading branch information
TetraTsunami committed Jun 13, 2024
1 parent 2f8b114 commit 15a8824
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 84 deletions.
75 changes: 26 additions & 49 deletions src/background/parsing.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,25 @@
import { Readability } from "@mozilla/readability";
import parseSiteSpecific from "./services";

export interface Meta {
title: string,
description?: string,
image?: {
url: string,
width?: string,
height?: string
},
imageUrl?: string
}

const metaFromHTML = (doc: Document, url: string) => {
const parseHTMLMeta = (doc: Document, url: string) => {
addBaseElement(doc, url);
const title = (doc.querySelector('meta[property="og:title"]') as HTMLMetaElement)?.content ||
doc.querySelector('title')?.textContent;
const description = (doc.querySelector('meta[property="og:description"]') as HTMLMetaElement)?.content ||
(doc.querySelector('meta[name="description"]') as HTMLMetaElement)?.content;
const ogImage = {
url: (doc.querySelector('meta[property="og:image"]') as HTMLMetaElement)?.content ||
(doc.querySelector('meta[property="og:image:url"]') as HTMLMetaElement)?.content,
width: (doc.querySelector('meta[property="og:image:width"]') as HTMLMetaElement)?.content,
height: (doc.querySelector('meta[property="og:image:height"]') as HTMLMetaElement)?.content
}
const image = ogImage.url ? ogImage : getFirstImage(doc, url);
const imageUrl = (doc.querySelector('meta[property="og:image"]') as HTMLMetaElement)?.content ||
(doc.querySelector('meta[property="og:image:url"]') as HTMLMetaElement)?.content ||
getFirstImage(doc, url);
return {
title,
description,
image,
imageUrl
} as Meta
}

Expand All @@ -46,47 +39,31 @@ const getFirstImage = (doc: Document, url: string) => {
const src = (img as HTMLImageElement).src;
// The src might be relative, so we need to convert it to an absolute URL
if (src && src.startsWith('http')) {
imgObj.url = src;
return src;
} else {
imgObj.url = new URL(src, url).href;
return new URL(src, url).href;
}

// Set image width and height properties if respective attributes exist
if ((img as HTMLImageElement).width) imgObj.width = (img as HTMLImageElement).width.toString();
if ((img as HTMLImageElement).height) imgObj.height = (img as HTMLImageElement).height.toString();
return imgObj;
}
}

const readabilityParse = async (doc: Document) => {
const reader = new Readability(doc)
return reader.parse()
const parseReadability = async (doc: Document) => {
const documentClone = doc.cloneNode(true);
// @ts-ignore - Readability wants a document and we're giving it a node, but it doesn't actually matter
return new Readability(documentClone).parse()
}

const mergeMeta = (tags: Meta, parsed?: any) => {
// There's some overlap, so we'll return a merged object with only the keys we need
if (!parsed) {
return {
title: tags.title,
description: tags.description,
image: tags.image,
body: "",
siteName: "",
};
}
return {
title: tags.title || parsed.title,
description: tags.description || parsed.excerpt,
image: tags.image,
body: parsed.textContent || "",
siteName: parsed.siteName || "",
};
};



// Main entry point for parsing
export const parseAndReply = async (doc: Document, url: string, res: (response?: any) => void) => {
const meta = metaFromHTML(doc, url)
const readability = await readabilityParse(doc)
res(mergeMeta(meta, readability))
const meta = parseHTMLMeta(doc, url)
const readability = await parseReadability(doc)
const siteSpecific = await parseSiteSpecific(doc, url)
const data = {
title: meta.title || readability?.title,
description: meta.description || readability?.excerpt,
imageUrl: meta.imageUrl,
body: siteSpecific.body || readability?.textContent,
siteName: readability?.siteName,
...siteSpecific
}
res(data)
}
24 changes: 24 additions & 0 deletions src/background/services/github.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import type { Parser } from "."

const GithubParser: Parser = {
matches: async (doc: Document, url: string) => {
const regex = "^https://github.com/*"
if (!url.match(regex)) return false
const path = new URL(url).pathname.split("/") // ["", "user", "repo", ...]
return path.length >= 3
},
parse: async (node: Node, url: string) => {
console.log("Parsing Github")
const path = new URL(url).pathname.split("/") // ["", "user", "repo", ...]
const data = await fetch(`https://api.github.com/repos/${path[1]}/${path[2]}/readme`).then(res => res.json())
console.log(data)
const decoded = atob(data.content)
console.log(decoded)
return {
body: `URL: ${url}\nREADME: ${decoded}`,
siteName: "Github",
}
}
}

export default GithubParser
23 changes: 23 additions & 0 deletions src/background/services/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import github from './github';
const parsers = [github];

export interface Parser {
matches: (doc: Document, url: string) => Promise<boolean>,
parse: (node: Node, url: string) => Promise<{ title?: string, description?: string, imageUrl?: string, body?: string, siteName?: string }>
}

const parseSiteSpecific = async (doc: Document, url: string) => {
for (const parser of parsers) {
try {
if (await parser.matches(doc, url)) {
const documentClone = doc.cloneNode(true);
return await parser.parse(documentClone, url);
}
} catch (e) {
console.error(e);
}
}
return {};
}

export default parseSiteSpecific;
19 changes: 4 additions & 15 deletions src/contents/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@ export const getStyle = () => {
return style
}

interface Meta {
title: string,
description?: string,
image?: {
url: string,
width?: string,
height?: string
},
}

const settings = new Storage()

const getConfig = async () => {
Expand All @@ -47,7 +37,6 @@ const getConfig = async () => {

chrome.runtime.onMessage.addListener((msg, sender, response) => { // Get HTML of current page
if (msg.name === "DOMInfo") {
console.log("Received DOMInfo request")
try {
response({ html: document.documentElement.outerHTML })
} catch (err) {
Expand Down Expand Up @@ -191,15 +180,15 @@ const ContentPopup = () => {
}
}

const renderTagPopup = (tagData: { title: any; description: string; image: any; siteName: string }) => {
const renderTagPopup = (tagData: { title: any; description: string; imageUrl: string; siteName: string }) => {
if (!tagData.title && !tagData.description) {
throw new Error("No data found")
}
setTitle(tagData.title)
setPublisher(tagData.siteName)
setImageUrl(tagData.image.url || "")
setImageUrl(tagData.imageUrl)
setDescription(tagData.description)
if (!tagData.image) {
if (!tagData.imageUrl) {
imageLoaded()
} else {
setTimeout(() => {
Expand Down Expand Up @@ -339,7 +328,7 @@ const ContentPopup = () => {
display: animationState == "closed" ? "none" : "block"
}}>
{animationState == "opening" && <div className="loader" />}
<div className={`flex flex-col overflow-y-auto overscroll-none ${animationState != "opening" ? "inner-popup" : "none" }`}
<div className={`flex flex-col overflow-y-auto max-w-full overscroll-none ${animationState != "opening" ? "inner-popup" : "none" }`}
style={{"--maxHeight": `${maxHeight}px`} as React.CSSProperties}>
<img // In Firefox, CSP may block the image if the img tag is created with a src attribute. We can't do {imageUrl && ...} nonsense here.
onLoad={imageLoaded}
Expand Down
35 changes: 16 additions & 19 deletions src/popup/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,17 @@ const Popup = () => {
const renderTagPopup = (tagData: {
title: any
description: string
image: any
imageUrl: string
siteName: string
}) => {
if (!tagData.title && !tagData.description) {
throw new Error("No data found")
}
setTitle(tagData.title)
setPublisher(tagData.siteName)
setImageUrl(tagData.image.url || "")
setImageUrl(tagData.imageUrl || "")
setDescription(tagData.description)
if (!tagData.image) {
if (!tagData.imageUrl) {
imageLoaded()
} else {
setTimeout(() => {
Expand All @@ -117,9 +117,9 @@ const Popup = () => {
}

const updatePopup = async () => {
chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => {
chrome.tabs.sendMessage(tabs[0].id, { name: "DOMInfo" }, (resp) => {
try {
try {
chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => {
chrome.tabs.sendMessage(tabs[0].id, { name: "DOMInfo" }, (resp) => {
if (!resp) {
window.close()
return
Expand All @@ -142,24 +142,21 @@ const Popup = () => {
}
}
)
} catch (e) {
setDescription(e)
setIsDoneLoading(true)
}
})
})
})
} catch (e) {
setDescription(e)
setIsDoneLoading(true)
}
}

// If it's got transparency, we don't want to cut it off (could be icon or logo) = use contain. Otherwise, it looks prettier to use cover
const getImageType = () => {
if (!imageUrl) {
return
}
if (!imageUrl) {return}
if (imageRef && imageRef.current) {
if (Math.abs(imageRef.current.width / imageRef.current.height - 1) < 0.1)
return "image-contain"
if (imageRef.current.width < 100 || imageRef.current.height < 100)
return "image-contain"
const height = imageRef.current.naturalHeight
const width = imageRef.current.naturalWidth
if (Math.abs(width / height - 1) < 0.1 || width < 100 || height < 100) return "image-contain"
}
return /svg|gif/.test(imageUrl) ? "image-contain" : "image-cover"
}
Expand All @@ -173,7 +170,7 @@ const Popup = () => {
className={`popup flex flex-col items-center overflow-clip bg-acorn-bg-1 text-base text-white`}>
{!isDoneLoading && <div className="loader" />}
<div
className={`flex flex-col overflow-y-auto overscroll-none ${!isDoneLoading && "hidden"}`}
className={`flex flex-col overflow-y-auto max-w-full overscroll-none ${!isDoneLoading && "hidden"}`}
style={{ "--maxHeight": `700px` } as React.CSSProperties}>
<img // In Firefox, CSP may block the image if the img tag is created with a src attribute. We can't do {imageUrl && ...} nonsense here.
onLoad={imageLoaded}
Expand Down
1 change: 0 additions & 1 deletion src/popup/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
.popup {
max-height: 600px;
width: 350px;
overscroll-behavior: contain;
}

body {
Expand Down

0 comments on commit 15a8824

Please sign in to comment.