forked from zuchka/remove-markdown
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
99 lines (92 loc) · 3.97 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
module.exports = function(md, options) {
options = options || {};
options.listUnicodeChar = options.hasOwnProperty('listUnicodeChar') ? options.listUnicodeChar : false;
options.stripListLeaders = options.hasOwnProperty('stripListLeaders') ? options.stripListLeaders : true;
options.gfm = options.hasOwnProperty('gfm') ? options.gfm : true;
options.useImgAltText = options.hasOwnProperty('useImgAltText') ? options.useImgAltText : true;
options.abbr = options.hasOwnProperty('abbr') ? options.abbr : false;
options.replaceLinksWithURL = options.hasOwnProperty('replaceLinksWithURL') ? options.replaceLinksWithURL : false;
options.htmlTagsToSkip = options.hasOwnProperty('htmlTagsToSkip') ? options.htmlTagsToSkip : [];
options.throwError = options.hasOwnProperty('throwError') ? options.throwError : false;
var output = md || '';
// Remove horizontal rules (stripListHeaders conflict with this rule, which is why it has been moved to the top)
output = output.replace(/^(-\s*?|\*\s*?|_\s*?){3,}\s*/gm, '');
try {
if (options.stripListLeaders) {
if (options.listUnicodeChar)
output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, options.listUnicodeChar + ' $1');
else
output = output.replace(/^([\s\t]*)([\*\-\+]|\d+\.)\s+/gm, '$1');
}
if (options.gfm) {
output = output
// Header
.replace(/\n={2,}/g, '\n')
// Fenced codeblocks
.replace(/~{3}.*\n/g, '')
// Strikethrough
.replace(/~~/g, '')
// Fenced codeblocks
.replace(/`{3}.*\n/g, '');
}
if (options.abbr) {
// Remove abbreviations
output = output.replace(/\*\[.*\]:.*\n/, '');
}
output = output
// Remove HTML tags
.replace(/<[^>]*>/g, '')
var htmlReplaceRegex = new RegExp('<[^>]*>', 'g');
if (options.htmlTagsToSkip.length > 0) {
// Using negative lookahead. Eg. (?!sup|sub) will not match 'sup' and 'sub' tags.
var joinedHtmlTagsToSkip = '(?!' + options.htmlTagsToSkip.join("|") + ')';
// Adding the lookahead literal with the default regex for html. Eg./<(?!sup|sub)[^>]*>/ig
htmlReplaceRegex = new RegExp(
'<' +
joinedHtmlTagsToSkip +
'[^>]*>',
'ig'
);
}
output = output
// Remove HTML tags
.replace(htmlReplaceRegex, '')
// Remove setext-style headers
.replace(/^[=\-]{2,}\s*$/g, '')
// Remove footnotes?
.replace(/\[\^.+?\](\: .*?$)?/g, '')
.replace(/\s{0,2}\[.*?\]: .*?$/g, '')
// Remove images
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, options.useImgAltText ? '$1' : '')
// Remove inline links
.replace(/\[([^\]]*?)\][\[\(].*?[\]\)]/g, options.replaceLinksWithURL ? '$2' : '$1')
// Remove blockquotes
.replace(/^(\n)?\s{0,3}>\s?/gm, '$1')
// .replace(/(^|\n)\s{0,3}>\s?/g, '\n\n')
// Remove reference-style links?
.replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, '')
// Remove atx-style headers
.replace(/^(\n)?\s{0,}#{1,6}\s*( (.+))? +#+$|^(\n)?\s{0,}#{1,6}\s*( (.+))?$/gm, '$1$3$4$6')
// Remove * emphasis
.replace(/([\*]+)(\S)(.*?\S)??\1/g, '$2$3')
// Remove _ emphasis. Unlike *, _ emphasis gets rendered only if
// 1. Either there is a whitespace character before opening _ and after closing _.
// 2. Or _ is at the start/end of the string.
.replace(/(^|\W)([_]+)(\S)(.*?\S)??\2($|\W)/g, '$1$3$4$5')
// Remove code blocks
.replace(/(`{3,})(.*?)\1/gm, '$2')
// Remove inline code
.replace(/`(.+?)`/g, '$1')
// // Replace two or more newlines with exactly two? Not entirely sure this belongs here...
// .replace(/\n{2,}/g, '\n\n')
// // Remove newlines in a paragraph
// .replace(/(\S+)\n\s*(\S+)/g, '$1 $2')
// Replace strike through
.replace(/~(.*?)~/g, '$1');
} catch(e) {
if (options.throwError) throw e;
console.error("remove-markdown encountered error: %s", e);
return md;
}
return output;
};