diff --git a/README.md b/README.md index 075e1ba..aebf82a 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ If `PATH` is a directory, the tool reads all the `.md` files recursively. - `--headings` - `--links` - `--tables` +- `--notes` - converts notes like `
...` to Kramdown and adds `markdown=1` argument. +**Cution:** If the note is already in the valid Kramdown format and doesn't contain HTML, the tool still converts it and can break the valid formatting. ### Example diff --git a/html-to-markdown.xcodeproj/project.pbxproj b/html-to-markdown.xcodeproj/project.pbxproj deleted file mode 100644 index c2c9a45..0000000 --- a/html-to-markdown.xcodeproj/project.pbxproj +++ /dev/null @@ -1,72 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 50; - objects = { - -/* Begin PBXFileReference section */ - F3F6366F210E94180030FC7D /* test.rb */ = {isa = PBXFileReference; lastKnownFileType = text.script.ruby; path = test.rb; sourceTree = ""; }; -/* End PBXFileReference section */ - -/* Begin PBXGroup section */ - F3F63668210E93A10030FC7D = { - isa = PBXGroup; - children = ( - F3F6366F210E94180030FC7D /* test.rb */, - ); - sourceTree = ""; - }; -/* End PBXGroup section */ - -/* Begin PBXProject section */ - F3F63669210E93A10030FC7D /* Project object */ = { - isa = PBXProject; - attributes = { - LastUpgradeCheck = 0940; - }; - buildConfigurationList = F3F6366C210E93A10030FC7D /* Build configuration list for PBXProject "html-to-markdown" */; - compatibilityVersion = "Xcode 9.3"; - developmentRegion = en; - hasScannedForEncodings = 0; - knownRegions = ( - en, - ); - mainGroup = F3F63668210E93A10030FC7D; - projectDirPath = ""; - projectRoot = ""; - targets = ( - ); - }; -/* End PBXProject section */ - -/* Begin XCBuildConfiguration section */ - F3F6366D210E93A10030FC7D /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - }; - name = Debug; - }; - F3F6366E210E93A10030FC7D /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - }; - name = Release; - }; -/* End XCBuildConfiguration section */ - -/* Begin XCConfigurationList section */ - F3F6366C210E93A10030FC7D /* Build configuration list for PBXProject "html-to-markdown" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - F3F6366D210E93A10030FC7D /* Debug */, - F3F6366E210E93A10030FC7D /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; -/* End XCConfigurationList section */ - }; - rootObject = F3F63669210E93A10030FC7D /* Project object */; -} diff --git a/html-to-markdown.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/html-to-markdown.xcodeproj/project.xcworkspace/contents.xcworkspacedata deleted file mode 100644 index b6d51ae..0000000 --- a/html-to-markdown.xcodeproj/project.xcworkspace/contents.xcworkspacedata +++ /dev/null @@ -1,7 +0,0 @@ - - - - - diff --git a/html-to-markdown.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/html-to-markdown.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist deleted file mode 100644 index 18d9810..0000000 --- a/html-to-markdown.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +++ /dev/null @@ -1,8 +0,0 @@ - - - - - IDEDidComputeMac32BitWarning - - - diff --git a/lib/converter.rb b/lib/converter.rb index bf09cd1..9b92a9d 100644 --- a/lib/converter.rb +++ b/lib/converter.rb @@ -3,7 +3,7 @@ module HtmlToKramdown # Converts input HTML to kramdown class Converter def default_options - { html_to_native: true, line_width: 1000 } + { html_to_native: false, line_width: 1000, input: 'html' } end def to_kramdown(string, options = {}) @@ -11,4 +11,4 @@ def to_kramdown(string, options = {}) document.to_kramdown end end -end \ No newline at end of file +end diff --git a/lib/converters/kramdown.rb b/lib/converters/kramdown.rb index 4fab828..6b56722 100644 --- a/lib/converters/kramdown.rb +++ b/lib/converters/kramdown.rb @@ -1,17 +1,16 @@ require 'kramdown' -# Override the 'converter/kramdown' to disable link definitions +# Disabling some functionality commenting it out module Kramdown module Converter # Converts an element tree to the kramdown format. class Kramdown - - ESCAPED_CHAR_RE = /(\$\$|])|^[ ]{0,3}(:)/ + #ESCAPED_CHAR_RE = /(\$\$)|^[ ]{0,3}(:)/ # Remove the links definitions conversion def convert_a(el, opts) if el.attr['href'].empty? "[#{inner(el, opts)}]()" - #elsif el.attr['href'] =~ /^(?:http|ftp)/ || el.attr['href'].count("()") > 0 + # elsif el.attr['href'] =~ /^(?:http|ftp)/ || el.attr['href'].count("()") > 0 # index = if link_el = @linkrefs.find {|c| c.attr['href'] == el.attr['href']} # @linkrefs.index(link_el) + 1 # else @@ -25,24 +24,52 @@ def convert_a(el, opts) end end - # Disabling addition of an extra new line - def convert(el, opts = {:indent => 0}) + # Disabling addition of an extra new line and excaping + def convert(el, opts = { indent: 0 }) res = send("convert_#{el.type}", el, opts) - if ![:html_element, :li, :dt, :dd, :td].include?(el.type) && (ial = ial_for_element(el)) + if !%i[html_element li dt dd td].include?(el.type) && (ial = ial_for_element(el)) res << ial res << "\n\n" if Element.category(el) == :block - elsif [:ul, :dl, :ol, :codeblock].include?(el.type) && opts[:next] && - ([el.type, :codeblock].include?(opts[:next].type) || - (opts[:next].type == :blank && opts[:nnext] && [el.type, :codeblock].include?(opts[:nnext].type))) + elsif %i[ul dl ol codeblock].include?(el.type) && opts[:next] && + ([el.type, :codeblock].include?(opts[:next].type) || + (opts[:next].type == :blank && opts[:nnext] && [el.type, :codeblock].include?(opts[:nnext].type))) res << "^\n\n" - #elsif Element.category(el) == :block && - # ![:li, :dd, :dt, :td, :th, :tr, :thead, :tbody, :tfoot, :blank].include?(el.type) && - # (el.type != :html_element || @stack.last.type != :html_element) && - # (el.type != :p || !el.options[:transparent]) - # res << "\n" + # elsif Element.category(el) == :block && + # ![:li, :dd, :dt, :td, :th, :tr, :thead, :tbody, :tfoot, :blank].include?(el.type) && + # (el.type != :html_element || @stack.last.type != :html_element) && + # (el.type != :p || !el.options[:transparent]) + # res << "\n" end res end + + # Disable escaping for special characters in text + def convert_text(el, opts) + if opts[:raw_text] + el.value + else + el.value.gsub(/\A\n/) do + opts[:prev] && opts[:prev].type == :br ? '' : "\n" + end.gsub(/\s+/, ' ')#.gsub(ESCAPED_CHAR_RE) { "\\#{$1 || $2}" } + end + end + + # Disable escaping for special characters in alt text of images + def convert_img(el, opts) + alt_text = el.attr['alt'].to_s#.gsub(ESCAPED_CHAR_RE) { $1 ? "\\#{$1}" : $2 } + src = el.attr['src'].to_s + if src.empty? + "![#{alt_text}]()" + else + title = parse_title(el.attr['title']) + link = if src.count("()") > 0 + "<#{src}>" + else + src + end + "![#{alt_text}](#{link}#{title})" + end + end end end end diff --git a/lib/crawler.rb b/lib/crawler.rb index 9eadd64..f0a5391 100644 --- a/lib/crawler.rb +++ b/lib/crawler.rb @@ -18,7 +18,11 @@ def images_to_kramdown(content) end def tables_to_kramdown(content) - content.gsub(tables, &replace) + content.gsub(tables, &replace) + end + + def notes_to_kramdown(content) + content.gsub(notes, &replace) end def replace @@ -52,5 +56,9 @@ def links def tables filter.tables end + + def notes + filter.notes + end end end diff --git a/lib/filter.rb b/lib/filter.rb index 6c79133..9fcac95 100644 --- a/lib/filter.rb +++ b/lib/filter.rb @@ -6,19 +6,20 @@ def headings end def links - %r{()} + %r{} end def tables - %r{(]*>(?:.|\n)*?<\/table>)} - end + %r{]*>(?:.|\n)*?<\/table>} + end def images /(^)/ end - # TODO - def notes; end + def notes + %r{