Skip to content

Commit

Permalink
Add an option to convert notes
Browse files Browse the repository at this point in the history
  • Loading branch information
dshevtsov committed Sep 4, 2018
1 parent 83c7b2d commit ff46fb3
Show file tree
Hide file tree
Showing 12 changed files with 280 additions and 161 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ If `PATH` is a directory, the tool reads all the `.md` files recursively.
### Available options

- `--images`
- `--headings` - adds a blank line after a heading.
- `--links` - converts to inline links.
- `--tables` - adds two blank lines after a table.
- `--headings`
- `--links`
- `--tables`
- `--notes` - converts notes like `<div class="bs-callout bs-callout-xxx">...` to Kramdown and adds `markdown=1` argument.
**Cution:** If the note is already in the valid Kramdown format and doesn't contain HTML, the tool still converts it and can break the valid formatting.

### Example

Expand All @@ -30,7 +32,7 @@ $ gem install kramdown
## Precautions

Note, that the Kramdown parser doesn't recognize Kramdown elements inside HTML blocks by default.
To make it work, provide additional parameter to tell Kramdown to parse kramdown inside HTML: `markdown="1"`, or `markdown="span"`, or `markdown="block"`
To make it work, provide additional parameter to enable Kramdown parsing within HTML: `markdown="1"`, or `markdown="span"`, or `markdown="block"`

Breaking example:
```html
Expand Down
72 changes: 0 additions & 72 deletions html-to-markdown.xcodeproj/project.pbxproj

This file was deleted.

This file was deleted.

This file was deleted.

5 changes: 2 additions & 3 deletions lib/converter.rb
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
require_relative 'converters/kramdown.rb'
require 'pandoc-ruby'
module HtmlToKramdown
# Converts input HTML to kramdown
class Converter
def default_options
{ html_to_native: true, line_width: 1000 }
{ html_to_native: false, line_width: 1000, input: 'html' }
end

def to_kramdown(string, options = {})
document = Kramdown::Document.new(string, default_options.merge(options))
document.to_kramdown
end
end
end
end
60 changes: 59 additions & 1 deletion lib/converters/kramdown.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,75 @@
require 'kramdown'

# Disabling some functionality commenting it out
module Kramdown
module Converter
# Converts an element tree to the kramdown format.
class Kramdown
#ESCAPED_CHAR_RE = /(\$\$)|^[ ]{0,3}(:)/

# Remove the links definitions conversion
def convert_a(el, opts)
if el.attr['href'].empty?
"[#{inner(el, opts)}]()"
# elsif el.attr['href'] =~ /^(?:http|ftp)/ || el.attr['href'].count("()") > 0
# index = if link_el = @linkrefs.find {|c| c.attr['href'] == el.attr['href']}
# @linkrefs.index(link_el) + 1
# else
# @linkrefs << el
# @linkrefs.size
# end
# "[#{inner(el, opts)}][#{index}]"
else
title = parse_title(el.attr['title'])
"[#{inner(el, opts)}](#{el.attr['href']}#{title})"
end
end

# Disabling addition of an extra new line and excaping
def convert(el, opts = { indent: 0 })
res = send("convert_#{el.type}", el, opts)
if !%i[html_element li dt dd td].include?(el.type) && (ial = ial_for_element(el))
res << ial
res << "\n\n" if Element.category(el) == :block
elsif %i[ul dl ol codeblock].include?(el.type) && opts[:next] &&
([el.type, :codeblock].include?(opts[:next].type) ||
(opts[:next].type == :blank && opts[:nnext] && [el.type, :codeblock].include?(opts[:nnext].type)))
res << "^\n\n"
# elsif Element.category(el) == :block &&
# ![:li, :dd, :dt, :td, :th, :tr, :thead, :tbody, :tfoot, :blank].include?(el.type) &&
# (el.type != :html_element || @stack.last.type != :html_element) &&
# (el.type != :p || !el.options[:transparent])
# res << "\n"
end
res
end

# Disable escaping for special characters in text
def convert_text(el, opts)
if opts[:raw_text]
el.value
else
el.value.gsub(/\A\n/) do
opts[:prev] && opts[:prev].type == :br ? '' : "\n"
end.gsub(/\s+/, ' ')#.gsub(ESCAPED_CHAR_RE) { "\\#{$1 || $2}" }
end
end

# Disable escaping for special characters in alt text of images
def convert_img(el, opts)
alt_text = el.attr['alt'].to_s#.gsub(ESCAPED_CHAR_RE) { $1 ? "\\#{$1}" : $2 }
src = el.attr['src'].to_s
if src.empty?
"![#{alt_text}]()"
else
title = parse_title(el.attr['title'])
link = if src.count("()") > 0
"<#{src}>"
else
src
end
"![#{alt_text}](#{link}#{title})"
end
end
end
end
end
18 changes: 9 additions & 9 deletions lib/crawler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,31 @@ module HtmlToKramdown
# convert them to Kramdown, and return the updated content
class Crawler
def links_to_kramdown(content)
content.gsub(links, &replace).strip
content.gsub(links, &replace)
end

def headings_to_kramdown(content)
content.gsub(headings, &replace)
end

def images_to_kramdown(content)
content.gsub(images, &replace).strip
content.gsub(images, &replace)
end

def tables_to_kramdown(content)
content.gsub(tables, &replace)
end

def substitute
convert_to_kramdown(matcher)
def notes_to_kramdown(content)
content.gsub(notes, &replace)
end

def replace
->(s) { convert_to_kramdown(s) }
end

def convert_to_kramdown(string, options = {})
converter.to_kramdown(string, options)
end

def matcher
Regexp.last_match(1).to_s
converter.to_kramdown(string, options).chomp
end

def converter
Expand All @@ -60,5 +56,9 @@ def links
def tables
filter.tables
end

def notes
filter.notes
end
end
end
11 changes: 6 additions & 5 deletions lib/filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,20 @@ def headings
end

def links
%r{(<a href=.+</a>)}
%r{<a href=.+</a>}
end

def tables
%r{(<table[^>]*>(?:.|\n)*?<\/table>)}
end
%r{<table[^>]*>(?:.|\n)*?<\/table>}
end

def images
/(^<img.+>)/
end

# TODO
def notes; end
def notes
%r{<div class="bs-callout bs-callout.+>(?:.|\n)*?<\/div>}
end

# TODO
def lists; end
Expand Down
12 changes: 10 additions & 2 deletions lib/options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
module HtmlToKramdown
# CLI option parser
class Options
VERSION = '3'.freeze
VERSION = '5'.freeze
# CLI options initialization
class ScriptOptions
attr_accessor :links, :tables, :images, :headings
attr_accessor :links, :tables, :images, :headings, :notes, :help

def initialize
self.links = false
self.tables = false
self.images = false
self.headings = false
self.notes = false
end
end

Expand All @@ -39,6 +40,7 @@ def self.option_parser
headings_option parser
images_option parser
tables_option parser
notes_option parser

parser.separator ''
parser.separator 'Common options:'
Expand Down Expand Up @@ -79,5 +81,11 @@ def self.tables_option(parser)
@options.tables = t
end
end

def self.notes_option(parser)
parser.on('-n', '--notes', 'Convert content of HTML notes in the .md files in the given path recursively.') do |n|
@options.notes = n
end
end
end
end
10 changes: 8 additions & 2 deletions lib/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@ class Runner
attr_reader :path, :args

def initialize(args)
@args = args
@args = args # return --help if no options were provided
end

EXTENSIONS = ['.md'].freeze

def run
# Parse CLI options
@options = Options.parse(args)
# After the parsing only the ARGV[0] (firtst argument in the CLI) remains in the arrray.
# After the parsing, the ARGV[0] (firtst argument in the CLI) remains in the array only.
# Convert the path to String to use with Find.find

path = args.join

Find.find(path) do |item|
puts "Starting with #{item} ..."
if FileTest.file?(item)
Expand Down Expand Up @@ -56,6 +58,10 @@ def go(file)
@content = reader.all(file)
converted_content = crawler.headings_to_kramdown(@content)
write(file, converted_content)
elsif @options.notes
@content = reader.all(file)
converted_content = crawler.notes_to_kramdown(@content)
write(file, converted_content)
end
end

Expand Down
Loading

0 comments on commit ff46fb3

Please sign in to comment.