Skip to content

Commit

Permalink
parsinglists not working
Browse files Browse the repository at this point in the history
  • Loading branch information
juangpc committed Jul 23, 2021
1 parent 1eeeb22 commit 4b3f914
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 46 deletions.
16 changes: 8 additions & 8 deletions tools/python/demo_recursive_folder_process.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import mne_cpp.core
import mne_cpp.core as mne
# import mne_cpp.pdf_doc
from os import stat

mne_cpp.core.version()
mne.version()

projectFolder = mne_cpp.core.baseFolder()
projectFolder = mne.baseFolder()

# Recursively list all the files in a directory and order by size and print results.
listOfFiles = []
mne_cpp.core.recursiveFolderProcess(projectFolder + 'doc/gh-pages', lambda f: \
listOfFiles.append((f, stat(f).st_size)) \
if f.name.endswith('.md') \
else None )
mne.recursiveFolderProcess(projectFolder + 'doc/gh-pages', lambda f: \
listOfFiles.append((f, stat(f).st_size)) \
if f.name.endswith('.md') \
else None )
listOfFiles.sort(reverse=True, key=lambda f:f[1])
for f in listOfFiles:
print('File: ' + f[0].path + ' - (' + mne_cpp.core.sizeHumanReadable(f[1]) + ')')
print('File: ' + f[0].path + ' - (' + mne.sizeHumanReadable(f[1]) + ')')

90 changes: 79 additions & 11 deletions tools/python/documentation_pdf_generator.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,93 @@
import mne_cpp.core
import mne_cpp.pdf_doc
import mne_cpp.pdf_doc as mnepdf

projectFolder = mne_cpp.core.baseFolder()
webBaseFolder = projectFolder + 'doc/gh-pages'

# webDocuments = mne_cpp.pdf_doc.scanFolder(webBaseFolder)
# webDocuments = mnepdf.scanFolder(webBaseFolder)
# print(webDocuments)

# web = mne_cpp.pdf_doc.buildWebStructure(webDocuments)
# web = mnepdf.buildWebStructure(webDocuments)
# print('Printing Web Structure:')
# print(web)

# (pathLabel, filePath, fileName, fileExt, fullPath) = mne_cpp.core.extractFilePaths('../../doc/gh-pages/pages/documentation/anonymize.md')
(pathLabel, filePath, fileName, fileExt, fullPath) = mne_cpp.core.extractFilePaths('../../doc/gh-pages/pages/contact.md')
# (pathLabel, filePath, fileName, fileExt, fullPath) = mne_cpp.core.extractFilePaths('../../doc/gh-pages/pages/contact.md')

# inFile = open(fullPath, mode = 'r', encoding = 'utf8')
# inText = inFile.read()
# inFile.close()

# outText = mnepdf.parseUnorderedList(inText)
# outFile = open(pathLabel + filePath + fileName + '.PROCESSED' + '.' + fileExt, mode = 'w', encoding = 'utf8')
# outFile.write(outText)
# outFile.close()

inText = r'''
---
layout: default
title: Markdown kitchen sink
nav_order: 99
---
(\n(( *[-*] *)|(\s*\d+\.\s*))[^\-*\n ].+)+
(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+
(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+
Text can be **bold**, _italic_, or ~~strikethrough~~.
[Link to another page](another-page).
There should be whitespace between paragraphs.
There should be whitespace between paragraphs. We recommend including a README, or a file with information about your project.
# [](#header-1)Header 1
This is a normal paragraph following a header. GitHub is a code hosting platform for version control and collaboration. It lets you and others work together on projects from anywhere.
## [](#header-2)Header 2
> This is a blockquote following a header.
>
> When something is important enough, you do it even if the odds are not in your favor.
### [](#header-3)Header 3
```js
// Javascript code with syntax highlighting.
var fun = function lang(l) {
dateformat.i18n = require('./lang/' + l)
return true;
}
```
```ruby
# Ruby code with syntax highlighting
GitHubPages::Dependencies.gems.each do |gem, version|
s.add_dependency(gem, "= #{version}")
end
```
- level 1 item
- level 2 item
- level 2 item
- level 3 item
- level 3 item
- level 1 item
- level 2 item
- level 2 item
- level 2 item
- level 1 item
- level 2 item
- level 2 item
- level 1 item
'''

outText = mnepdf.parseLists(inText)





a = 3

inFile = open(fullPath, mode = 'r', encoding = 'utf8')
inText = inFile.read()
inFile.close()

outText = mne_cpp.pdf_doc.parseUnorderedList(inText)
outFile = open(pathLabel + filePath + fileName + '.PROCESSED' + '.' + fileExt, mode = 'w', encoding = 'utf8')
outFile.write(outText)
outFile.close()

15 changes: 15 additions & 0 deletions tools/python/list_trash_text1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
\begin{itemize}
\item level 1 item
- level 2 item
- level 2 item
- level 3 item
- level 3 item
\item level 1 item
- level 2 item
- level 2 item
- level 2 item
\item level 1 item
- level 2 item
- level 2 item
\item level 1 item
\end{itemize}
26 changes: 26 additions & 0 deletions tools/python/list_trash_text2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
\begin{itemize}
\item level 1 item
\begin{itemize}
\item level 2 item
\item level 2 item
\begin{itemize}
\item level 3 item
\item level 3 item
\end{itemize}
\end{itemize}
\item level 1 item
\begin{itemize}
\item level 2 item
\item level 2 item
\item level 2 item
\end{itemize}
\item level 1 item
\begin{itemize}
\item level 2 item
\item level 2 item
\end{itemize}
\item level 1 item
\end{itemize}



Binary file modified tools/python/mne_cpp/__pycache__/core.cpython-39.pyc
Binary file not shown.
Binary file modified tools/python/mne_cpp/__pycache__/pdf_doc.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion tools/python/mne_cpp/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ def parseInputArguments(argsToParse, **opts):
options[arg_adapted] = argsToParse[arg]
return (v for k, v in options.items())

_suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
def sizeHumanReadable(size):
_suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
# determine binary order in steps of size 10
# (coerce to int, // still returns a float)
order = int(log2(size) / 10) if size else 0
Expand Down
95 changes: 69 additions & 26 deletions tools/python/mne_cpp/pdf_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,14 @@ def parseMarkDownFile(file, **inputArgs):
else:
with open(file.fullPath, 'r', encoding='utf8') as markDownFile, \
open(texFile,'a+') as texFile:
# The order here is relevant. Some of the regex depend on not having conflicting patterns.
# i.e. empty lines can sometimes interfere with some lists patterns
# i.e.2 horizontal lines (\n* * *) pattern can sometimes be understood as a list.
# I've tried to minimize these conflicts but I'm not 100% sure. So any change should be tested...
inText = markDownFile.read()
inText = stripEmptyLines(inText)
inText = deleteJustTheDocsHeader(inText)
inText = parseHorizontalLines(inText)
inText = parseInlineItalicText(inText)
inText = parseInlineBoldText(inText)
inText = parseUnorderedList(inText)
Expand All @@ -211,25 +217,11 @@ def parseInlineItalicText(inText):
def parseInlineBoldText(inText):
return re.sub(r'(?<=\W)((?P<dstar>\*\*)|__)(?P<btext>[\w ]+)((?(dstar)\*\*)|__)(?=\W)',r'\\textbf{\g<btext>}', inText)

def parseUnorderedList(inText):
match = re.search(r'(\n\s?\*\s?.+)(\n\s?\*\s?(.+))*', inText)
if match:
outList = '\n\\begin{itemize}\n'
pattern2 = re.compile(r'\n*\s*\*\s*(?P<item>.+)(?=\n)?')
itemList = pattern2.finditer(match.group(0))
for item in itemList:
outList += '\t\\item ' + item.group('item') + '\n'
outList += '\\end{itemize}'
outText = inText[:match.start(0)] + outList + inText[match.end(0):]
return parseUnorderedList(outText)
else:
return inText

def parseInlineImages(inText):
match = re.search(r'!\[(?P<alt_text>[^]]+)\]\((?P<imgFilePath>[^)]+)\)', inText)
if match:
imgPath = mne_cpp.core.none_if_empty(match.group('imgFilePath'))
imgAltText = mne_cpp.core.none_if_empty(match.group('alt_text'))
imgPath = mne_cpp.core.noneIfEmpty(match.group('imgFilePath'))
imgAltText = mne_cpp.core.noneIfEmpty(match.group('alt_text'))
figText = '\n\\begin{wrapfigure}{r}{0.5\\textwidth}'
figText += '\n\t\\begin{center}'
figText += '\n\t\t\\includegraphics[width=0.4\\textwidth]{ ' + imgPath + '}'
Expand All @@ -244,7 +236,7 @@ def parseInlineImages(inText):
def parseInlineHTMLImages(inText):
match = re.search(r'<\s*img\s*src\s*=\s*"(?P<imgPath>[^"]+)".*>', inText)
if match:
imgPath = mne_cpp.core.none_if_empty(match.group('imgFilePath'))
imgPath = mne_cpp.core.noneIfEmpty(match.group('imgFilePath'))
figText = '\n\\begin{wrapfigure}{r}{0.5\\textwidth}'
figText += '\n\t\\begin{center}'
figText += '\n\t\t\\includegraphics[width=0.4\\textwidth]{ ' + imgPath + '}'
Expand All @@ -256,7 +248,7 @@ def parseInlineHTMLImages(inText):
else:
return inText

def parseTableMd(inText)
def parseTableMd(inText):
match = re.search(r'(?<=\n)\|([^|\n]+\|)+', inText)
if match:
tableText = inText[match.start(0):match.end(0)]
Expand Down Expand Up @@ -312,14 +304,67 @@ def parseHeaders(inText):
else:
return inText

def parseHorizontalLine(inText):
def parseHorizontalLines(inText):
return re.sub(r'(?<=\n)\*\s\*\s\*(?=\n)','\\noindent\\rule{15cm}{0.5pt}', inText)

# parse horizontal line
# \n\* \* \*
def stripHorizontalLines(inText):
return re.sub(r'(?<=\n)\*\s\*\s\*(?=\n)','', inText)

def stripEmptyLines(inText):
return re.sub(r'((?<=\n)\n)','',inText)

# def parseUnorderedList(inText):
# match = re.search(r'(\n\s?\*\s?.+)(\n\s?\*\s?(.+))*', inText)
# if match:
# outList = '\n\\begin{itemize}\n'
# pattern2 = re.compile(r'\n*\s*\*\s*(?P<item>.+)(?=\n)?')
# itemList = pattern2.finditer(match.group(0))
# for item in itemList:
# outList += '\t\\item ' + item.group('item') + '\n'
# outList += '\\end{itemize}'
# outText = inText[:match.start(0)] + outList + inText[match.end(0):]
# return parseUnorderedList(outText)
# else:
# return inText
def parseUnorderedList(inText, i):
pattern = r'\n(( {0}[-*] *)(?P<itemText>.*))'
lastMatch = len(re.findall(pattern, inListText))
matches = re.finditer(pattern, inListText)
parsedText = ''
for numMatch, match in enumerate(matches, start = 1):
itemText = '\n\\begin{itemize}' if numMatch is 1 else ''
itemText += '\\item ' + match.group('itemText')
itemText += '\\end{itemize}' if numMatch is lastMatch
parsedText += inListText[:match.start()] + itemText + inListText[match.end():]


def parseOneList(inList):
outList = parseUnorderedList(inList)



def parseLists(inText):
match = re.search(r'(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+', inText)
if match:
parsedList = parseOneList(match.group())
outText = inText[:match.start()] + parsedList + inText[match.end():]
return parseLists
else:
return inText


# for spaces in range(2:2:6):
# pattern =

# matches4ord = re.finditer(r'(\n( {2}(\d+\.) *)([^-\n ].*))+', text[match.start(0):match.end(0)])
# for match4ord in matches4ord:
# outText = '\n\\begin{enumerate}\n'

# ((\n {2}\d+\. *)(?P<item>.*))

# parse all lists with (\n((\s*[-*]\s*)|(\s*\d+\.\s*)).+)+
# see https://regex101.com/r/2uKqPB/1/
# https://regex101.com/r/idzIo5/1/
# https://regex101.com/r/Iu3hKt/1

# after this parse
# ordered lists of level 4
Expand All @@ -335,18 +380,16 @@ def parseHorizontalLine(inText):
# https://tex.stackexchange.com/questions/247681/how-to-create-checkbox-todo-list




# still missing:
# ordered and unordered lists parsing
# inbound links vs outbound links
# parse inline code
# preamble and ending file
# parse multiple terms description/definition

# header tags up to 6 #s

def processImage(imageFile):
_, _, _, _, fileExt = mne_cpp.core.parseFilePathNameExt(imageFile)
_, _, _, _, fileExt = mne_cpp.core.parseFilePathNameExt(imageFile)
if fileExt == "jpg" or fileExt == "jpeg":
jpg2png(imageFile)
if fileExt == "svg2":
Expand Down

0 comments on commit 4b3f914

Please sign in to comment.