From fe626d71e74dec5acff04db5366d98534d171d87 Mon Sep 17 00:00:00 2001 From: kasperosterbye Date: Thu, 20 Oct 2022 11:23:28 +0200 Subject: [PATCH 01/40] intermediate --- src/Microdown/MicAbstractDelimiter.class.st | 34 ++++ src/Microdown/MicInlineParser2.class.st | 186 ++++++++++++++++++++ src/Microdown/MicInlineParserToken.class.st | 48 +++++ src/Microdown/String.extension.st | 10 ++ 4 files changed, 278 insertions(+) create mode 100644 src/Microdown/MicInlineParser2.class.st create mode 100644 src/Microdown/MicInlineParserToken.class.st diff --git a/src/Microdown/MicAbstractDelimiter.class.st b/src/Microdown/MicAbstractDelimiter.class.st index a011d0f0..2849d6e3 100644 --- a/src/Microdown/MicAbstractDelimiter.class.st +++ b/src/Microdown/MicAbstractDelimiter.class.st @@ -32,6 +32,25 @@ MicAbstractDelimiter class >> allActive [ ^ self allSubclasses select: [ :subclass | subclass isActive ] ] +{ #category : #'as yet unclassified' } +MicAbstractDelimiter class >> allRegex [ + ^ ((self allActive collect: [ :del | |str| + str := WriteStream on: ''. + (del associatedInlineBlock + ifNil: [true] ifNotNil: [:block | block isEvaluated]) + ifTrue: [ + del markup do: [:char| str nextPut: $\;nextPut: char] ] + ifFalse: [ + del markup do: [:char| str nextPut: $\;nextPut: char]. + del associatedInlineBlock new closingDelimiter in: [ :other | + str nextPutAll: (self regExNot: other). + other do: [:char| str nextPut: $\;nextPut: char]. + ] + ]. + str contents]) joinUsing: '|') asRegex + +] + { #category : #accessing } MicAbstractDelimiter class >> associatedInlineBlock [ ^ self subclassResponsibility @@ -83,6 +102,21 @@ MicAbstractDelimiter class >> markup [ ^ self subclassResponsibility ] +{ #category : #'as yet unclassified' } +MicAbstractDelimiter class >> regExNot: markup [ + "return a regular expression (string), which is recognizing anything but markup" + | str prefix| + str := WriteStream on: ''. + str nextPut: $(. + prefix := ''. + 1 to: markup size do: [ :idx | + str nextPutAll: prefix; nextPut: $[;nextPut: $^; nextPut: (markup at: idx); nextPut: $]. + prefix := '|', (markup copyFrom: 1 to: idx) escapeAll. + ]. + str nextPut: $);nextPut: $*. + ^ str contents +] + { #category : #accessing } MicAbstractDelimiter class >> size [ ^ self markup size diff --git a/src/Microdown/MicInlineParser2.class.st b/src/Microdown/MicInlineParser2.class.st new file mode 100644 index 00000000..14107a07 --- /dev/null +++ b/src/Microdown/MicInlineParser2.class.st @@ -0,0 +1,186 @@ +Class { + #name : #MicInlineParser2, + #superclass : #Object, + #instVars : [ + 'delimiterRegEx', + 'delimiterDictionary' + ], + #pools : [ + 'MicMicrodownSharedPool' + ], + #category : #'Microdown-Parser' +} + +{ #category : #'escape character' } +MicInlineParser2 class >> escapeDecode: aString [ + "I convert all encoded chars back to their original (without the leading escape character)" + "My sister method escapeEncode encodes into the format I decode from" + | inStream outStream char special | + aString ifEmpty: [ ^aString ]. + special := [ :c | c asInteger between: self magicCharacter and: self magicCharacter + 65536 ]. + inStream := ReadStream on: aString. + outStream := WriteStream on: String new. + [ inStream atEnd ] whileFalse: [ + char := inStream next. + (special value: char) + ifTrue: [ char := (char asInteger - self magicCharacter ) asCharacter ]. + outStream nextPut: char + ]. + ^ outStream contents + +] + +{ #category : #'escape character' } +MicInlineParser2 class >> escapeEncode: aString [ + "I convert all escaped characters (eg '\`' or '\\') into special characters which are not used in Microdown" + "My sister method escapeDecode reverts back" + | inStream outStream char | + aString size <= 1 ifTrue: [ ^aString ]. + inStream := ReadStream on: aString. + outStream := WriteStream on: String new. + [ inStream atEnd ] whileFalse: [ + char := inStream next. + (char = $\ and: [ inStream atEnd not ]) + ifTrue: [ char := (inStream next asInteger + self magicCharacter) asCharacter ]. + outStream nextPut: char + ]. + ^ outStream contents + +] + +{ #category : #'escape character' } +MicInlineParser2 class >> escapeReescape: aString except: keep [ + "I convert all encoded back to escaped chars, except the characters in keep" + "My sister method escapeEncode encodes into the format I decode from" + | inStream outStream char | + aString ifEmpty: [ ^aString ]. + inStream := ReadStream on: aString. + outStream := WriteStream on: String new. + [ inStream atEnd ] whileFalse: [ + char := inStream next. + (char asInteger between: self magicCharacter and: self magicCharacter + 65536) + ifTrue: [ + char := (char asInteger - self magicCharacter ) asCharacter. + (keep includes: char) ifFalse: [outStream nextPut: $\]] . + outStream nextPut: char + ]. + ^ outStream contents + +] + +{ #category : #'escape character' } +MicInlineParser2 class >> magicCharacter [ + "All escaped characters are moved out of range. + The unicode range Private Use Area is used, + see https://en.wikipedia.org/wiki/Private_Use_Areas " + ^ 16r100000 "Private Use Area-B" +] + +{ #category : #parsing } +MicInlineParser2 >> compactRaws: tokenStream [ + "compact tokenStream so raw/unevaluated is single tokens" + + +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> createInlineBlock: token [ + "token is an opening delimiter" + | closer | + closer := (delimiterDictionary at: token) associatedInlineBlock closingDelimiter + + +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> createInlineBlock: openingDelimiter inStream: aReadStream [ + | pos closer bodyStream newBlock| + pos := aReadStream position. + closer := openingDelimiter associatedInlineBlock new closingDelimiter. + (self skipTo: closer inStream: aReadStream) + ifFalse: [ + aReadStream position: pos. + ^ self createTextBlock: openingDelimiter markup]. + bodyStream := ReadStream + on: aReadStream contents + from: pos + to: aReadStream position - 1. + newBlock := openingDelimiter associatedInlineBlock new + substring: (aReadStream contents copyFrom: pos to: aReadStream position -1); + children: (self eagerParse: bodyStream ). + ^ newBlock + + +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> createTextBlock: token [ + ^ MicTextBlock new + substring: token; + children: #() +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> eagerParse: tokenStream [ + "return an array of blocks from parsing tokenStream" + | children child | + children := OrderedCollection new. + [ tokenStream atEnd ] + whileFalse: [ | token delType | + token := tokenStream next. + delType := delimiterDictionary at: token ifAbsent: [ nil ]. + child := (delType + ifNil: [ self createTextBlock: token ] + ifNotNil: [ + delType isOpener + ifTrue: [self createInlineBlock: delType inStream: tokenStream] + ifFalse: [ self createTextBlock: token] ]). + children add: child. + ]. + ^ children +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> initialize [ + delimiterRegEx := MicAbstractDelimiter allRegex. + delimiterDictionary := (MicAbstractDelimiter allActive collect: [ :del | del markup -> del ]) asDictionary . +] + +{ #category : #parsing } +MicInlineParser2 >> parse: aString [ + "I return an array of inline blocks" + | escapedStream tokenStream | + escapedStream := self class escapeEncode: aString. + tokenStream := self tokenize: escapedStream. + ^ self eagerParse: tokenStream +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> skipTo: closer inStream: tokenStream [ + "skip tokenStream to closer, ignore closers in non-evaluated. return true if we found it" + | startPos| + startPos := tokenStream position. + [ tokenStream atEnd] + whileFalse: [ | token| + token := tokenStream next. + token = closer ifTrue: [ ^true ] + ]. + ^ false + +] + +{ #category : #parsing } +MicInlineParser2 >> tokenize: escapedStream [ + | splits tokens from| + splits := delimiterRegEx matchingRangesIn: escapedStream. + tokens := OrderedCollection new. + from := 1. + splits do: [ :delMatch | + tokens add: (escapedStream copyFrom: from to: delMatch first - 1). + tokens add: (escapedStream copyFrom: delMatch first to: delMatch last). + from := delMatch last + 1 + ]. + from <= escapedStream size ifTrue: [ tokens add: (escapedStream copyFrom: from to: escapedStream size) ]. + ^ ReadStream on: tokens + +] diff --git a/src/Microdown/MicInlineParserToken.class.st b/src/Microdown/MicInlineParserToken.class.st new file mode 100644 index 00000000..4f371cf3 --- /dev/null +++ b/src/Microdown/MicInlineParserToken.class.st @@ -0,0 +1,48 @@ +Class { + #name : #MicInlineParserToken, + #superclass : #Object, + #instVars : [ + 'string', + 'delimiter' + ], + #pools : [ + 'MicMicrodownSharedPool' + ], + #category : #'Microdown-Parser' +} + +{ #category : #'instance creation' } +MicInlineParserToken class >> on: aString [ + ^ self new + string: aString; + delimiter: 88 +] + +{ #category : #accessing } +MicInlineParserToken >> delimiter [ + + ^ delimiter +] + +{ #category : #accessing } +MicInlineParserToken >> delimiter: anObject [ + + delimiter := anObject +] + +{ #category : #'instance creation' } +MicInlineParserToken >> on: aString [ + +] + +{ #category : #accessing } +MicInlineParserToken >> string [ + + ^ string +] + +{ #category : #accessing } +MicInlineParserToken >> string: anObject [ + + string := anObject +] diff --git a/src/Microdown/String.extension.st b/src/Microdown/String.extension.st index 8c8aebf2..0713e3a5 100644 --- a/src/Microdown/String.extension.st +++ b/src/Microdown/String.extension.st @@ -5,6 +5,16 @@ String >> asMicResourceReference [ ^ MicResourceReference fromUri: self ] +{ #category : #'*Microdown' } +String >> escapeAll [ + | escaped | + escaped := String new: self size * 2. + 1 to: self size do: [ :idx | + escaped at: idx*2-1 put: $\. + escaped at: idx*2 put: (self at: idx)]. + ^ escaped +] + { #category : #'*Microdown' } String >> resolveDocument: document [ ^ self asMicResourceReference resolveDocument: document. From 2b9cf9767726577a13200c0843634b56a560a968 Mon Sep 17 00:00:00 2001 From: kasperosterbye Date: Thu, 20 Oct 2022 12:42:30 +0200 Subject: [PATCH 02/40] inline parser go 1 --- src/Microdown/MicAbstractDelimiter.class.st | 5 ++ src/Microdown/MicInlineParser2.class.st | 96 +++++++++++++-------- 2 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/Microdown/MicAbstractDelimiter.class.st b/src/Microdown/MicAbstractDelimiter.class.st index 2849d6e3..87814576 100644 --- a/src/Microdown/MicAbstractDelimiter.class.st +++ b/src/Microdown/MicAbstractDelimiter.class.st @@ -102,6 +102,11 @@ MicAbstractDelimiter class >> markup [ ^ self subclassResponsibility ] +{ #category : #'as yet unclassified' } +MicAbstractDelimiter class >> rawStyleOpeningMarkups [ + ^ self allActive select: [ :del | del isOpener and: [ del associatedInlineBlock isEvaluated not ] ] +] + { #category : #'as yet unclassified' } MicAbstractDelimiter class >> regExNot: markup [ "return a regular expression (string), which is recognizing anything but markup" diff --git a/src/Microdown/MicInlineParser2.class.st b/src/Microdown/MicInlineParser2.class.st index 14107a07..b8cff3fe 100644 --- a/src/Microdown/MicInlineParser2.class.st +++ b/src/Microdown/MicInlineParser2.class.st @@ -76,22 +76,6 @@ MicInlineParser2 class >> magicCharacter [ ^ 16r100000 "Private Use Area-B" ] -{ #category : #parsing } -MicInlineParser2 >> compactRaws: tokenStream [ - "compact tokenStream so raw/unevaluated is single tokens" - - -] - -{ #category : #'as yet unclassified' } -MicInlineParser2 >> createInlineBlock: token [ - "token is an opening delimiter" - | closer | - closer := (delimiterDictionary at: token) associatedInlineBlock closingDelimiter - - -] - { #category : #'as yet unclassified' } MicInlineParser2 >> createInlineBlock: openingDelimiter inStream: aReadStream [ | pos closer bodyStream newBlock| @@ -103,16 +87,33 @@ MicInlineParser2 >> createInlineBlock: openingDelimiter inStream: aReadStream [ ^ self createTextBlock: openingDelimiter markup]. bodyStream := ReadStream on: aReadStream contents - from: pos + from: pos + 1 to: aReadStream position - 1. newBlock := openingDelimiter associatedInlineBlock new substring: (aReadStream contents copyFrom: pos to: aReadStream position -1); - children: (self eagerParse: bodyStream ). + children: (self parseChildrenIn: bodyStream ). ^ newBlock ] +{ #category : #'as yet unclassified' } +MicInlineParser2 >> createRawStyleBlock: token [ + | opener block substring| + opener := (MicAbstractDelimiter rawStyleOpeningMarkups) + detect: [ :o | token beginsWith: o markup] + ifNone: [ nil ]. + opener ifNil: [ ^ self createTextBlock: token ]. + block := opener associatedInlineBlock new. + substring := token + copyFrom: opener markup size + 1 + to: token size - block closingDelimiter size. + ^ block + substring: substring; + children: { self createTextBlock: substring } + +] + { #category : #'as yet unclassified' } MicInlineParser2 >> createTextBlock: token [ ^ MicTextBlock new @@ -121,7 +122,45 @@ MicInlineParser2 >> createTextBlock: token [ ] { #category : #'as yet unclassified' } -MicInlineParser2 >> eagerParse: tokenStream [ +MicInlineParser2 >> initialize [ + delimiterRegEx := MicAbstractDelimiter allRegex. + delimiterDictionary := (MicAbstractDelimiter allActive collect: [ :del | del markup -> del ]) asDictionary . +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> joinTextNodesOf: children [ + "children might have several text blocks following each other - join them and remove empty Text blocks" + | outStream bufferText | + outStream := WriteStream on: OrderedCollection new. + bufferText := nil. + children do: [ :child | + child class = MicTextBlock + ifTrue: [ bufferText + ifNil: [ bufferText := child ] + ifNotNil: [ bufferText substring: (bufferText substring , child substring) ] ] + ifFalse: [ bufferText + ifNil: [ outStream nextPut: child ] + ifNotNil: [ + bufferText substring ifNotEmpty: [outStream nextPut: bufferText]. + outStream nextPut: child. bufferText := nil ] + ] + ]. + (bufferText isNotNil and: [ bufferText substring isNotEmpty ]) + ifTrue: [ outStream nextPut: bufferText ]. + ^ outStream contents +] + +{ #category : #parsing } +MicInlineParser2 >> parse: aString [ + "I return an array of inline blocks" + | escapedStream tokenStream | + escapedStream := self class escapeEncode: aString. + tokenStream := self tokenize: escapedStream. + ^ self parseChildrenIn: tokenStream +] + +{ #category : #'as yet unclassified' } +MicInlineParser2 >> parseChildrenIn: tokenStream [ "return an array of blocks from parsing tokenStream" | children child | children := OrderedCollection new. @@ -130,29 +169,14 @@ MicInlineParser2 >> eagerParse: tokenStream [ token := tokenStream next. delType := delimiterDictionary at: token ifAbsent: [ nil ]. child := (delType - ifNil: [ self createTextBlock: token ] + ifNil: [ self createRawStyleBlock: token ] ifNotNil: [ delType isOpener ifTrue: [self createInlineBlock: delType inStream: tokenStream] ifFalse: [ self createTextBlock: token] ]). children add: child. ]. - ^ children -] - -{ #category : #'as yet unclassified' } -MicInlineParser2 >> initialize [ - delimiterRegEx := MicAbstractDelimiter allRegex. - delimiterDictionary := (MicAbstractDelimiter allActive collect: [ :del | del markup -> del ]) asDictionary . -] - -{ #category : #parsing } -MicInlineParser2 >> parse: aString [ - "I return an array of inline blocks" - | escapedStream tokenStream | - escapedStream := self class escapeEncode: aString. - tokenStream := self tokenize: escapedStream. - ^ self eagerParse: tokenStream + ^ self joinTextNodesOf: children ] { #category : #'as yet unclassified' } From bb3192677b6a5f37ece347925bcd500cb2bd50a6 Mon Sep 17 00:00:00 2001 From: kasperosterbye Date: Sat, 22 Oct 2022 17:59:38 +0200 Subject: [PATCH 03/40] added new inline delimiter class and other stuff --- .../MicInlineTokenStreamTest.class.st | 8 ++ src/Microdown/MicInlineDelimiter.class.st | 101 ++++++++++++++++++ src/Microdown/MicInlineParser2.class.st | 29 ++--- src/Microdown/MicInlineParserToken.class.st | 48 --------- src/Microdown/MicInlineToken.class.st | 43 ++++++++ src/Microdown/MicInlineTokenStream.class.st | 64 +++++++++++ 6 files changed, 223 insertions(+), 70 deletions(-) create mode 100644 src/Microdown-Tests/MicInlineTokenStreamTest.class.st create mode 100644 src/Microdown/MicInlineDelimiter.class.st delete mode 100644 src/Microdown/MicInlineParserToken.class.st create mode 100644 src/Microdown/MicInlineToken.class.st create mode 100644 src/Microdown/MicInlineTokenStream.class.st diff --git a/src/Microdown-Tests/MicInlineTokenStreamTest.class.st b/src/Microdown-Tests/MicInlineTokenStreamTest.class.st new file mode 100644 index 00000000..34cf8204 --- /dev/null +++ b/src/Microdown-Tests/MicInlineTokenStreamTest.class.st @@ -0,0 +1,8 @@ +" +A MicInlineTokenStreamTest is a test class for testing the behavior of MicInlineTokenStream +" +Class { + #name : #MicInlineTokenStreamTest, + #superclass : #TestCase, + #category : #'Microdown-Tests-InlineParser' +} diff --git a/src/Microdown/MicInlineDelimiter.class.st b/src/Microdown/MicInlineDelimiter.class.st new file mode 100644 index 00000000..5af7be84 --- /dev/null +++ b/src/Microdown/MicInlineDelimiter.class.st @@ -0,0 +1,101 @@ +" +I am a class representing a inline delimiter (for example `**` for bold). + +My class side manages the set of delimiters. The delimiters are defined and set up in class side `initializeDelimiters` +" +Class { + #name : #MicInlineDelimiter, + #superclass : #Object, + #instVars : [ + 'markup', + 'blockClass', + 'closer', + 'opener' + ], + #classVars : [ + 'DelimiterMap' + ], + #pools : [ + 'MicMicrodownSharedPool' + ], + #category : #'Microdown-InlineParser' +} + +{ #category : #initialization } +MicInlineDelimiter class >> initialize [ +