-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.ts
76 lines (59 loc) · 2.16 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import { Window } from 'happy-dom';
async function fetchMarkdown(url: string) {
const window = new Window({url});
const html = await fetch(url).then(res => res.text());
window.document.body.innerHTML = html;
return window.document.querySelector('#doc')?.innerHTML ?? '';
}
const HOSTNAME = 'https://g0v.hackmd.io';
/**
* Gets the absolute URLs to markdown content
* @param markdown
* @returns list of link text, title and URL
*/
async function getMenuLinks(markdown: string) {
const links = markdown.match(/- \[.*?\]\(.*?\)/g);
return links?.map(link => {
const [text, title, url] = link.match(/\[(.*?)\]\((.*?)\)/) || [];
return { text, title, url: url.startsWith('/') ? `${HOSTNAME}${url}` : url };
}) ?? [];
}
/**
* Execute async functions in batches
*/
function batchExecPromises(asyncFns: (() => Promise<unknown>)[], batchSize: number = 5) {
let fnIdx = 0;
const firstBatch = asyncFns.slice(0, Math.min(asyncFns.length, batchSize));
function chain(): Promise<unknown> | undefined {
const nextAsyncFn = asyncFns[fnIdx++];
return nextAsyncFn === undefined ? undefined : nextAsyncFn().then(chain);
};
return Promise.all(firstBatch.map(chain));
}
/**
* Grab all markdowns listed in a HackMD book mode
*
* @param menuUrl
* @returns
*/
async function saveAllMarkdowns(menuUrl = 'https://g0v.hackmd.io/@cofacts/meetings/', to='./data') {
const menuMd = await fetchMarkdown(menuUrl);
const links = await getMenuLinks(menuMd);
console.info(`[saveAllMarkdowns] Found ${links.length} links.`);
// Map each link to a async fn that fetches the markdown and saves it
//
const asyncFns = links.map(({ title, url }) => async () => {
console.info(`[saveAllMarkdowns] Processing ${url}...`);
const markdown = await fetchMarkdown(url);
// Remove html tags from title, and replace all other `/` with `-`
const sanitizedTitle = title.replace(/<.*?>/g, '').replace(/\//g, '-');
const fileName = `${to}/${sanitizedTitle}.md`;
console.info(`[saveAllMarkdowns] Saving ${fileName}...`);
await Bun.write(fileName, markdown);
});
batchExecPromises(asyncFns);
}
async function main() {
await saveAllMarkdowns();
}
main();