Skip to content

Commit

Permalink
Introduce new events like leftParenthesis()/rightParenthesis() as…
Browse files Browse the repository at this point in the history
… a replacement for `openGroup()` and `closeGroup()`.

It is more efficient and allows to write clearer code.
  • Loading branch information
carlosame committed Apr 12, 2023
1 parent 7d2d9b3 commit 6a60cb5
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 26 deletions.
7 changes: 7 additions & 0 deletions UPGRADING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Upgrading from 1.x

Tokenproducer 2.0 replaces the methods `openGroup()` and `closeGroup()` with
`leftParenthesis()`/`rightParenthesis()` and others. Using the new event methods
may allow you to write clearer code, although you can also have your old
`TokenHandler` implementation inherit from the new `LegacyTokenHandler`, which
is compatible with the 1.x API and provides a simpler upgrade path.
28 changes: 24 additions & 4 deletions src/main/java/io/sf/carte/uparser/CommentRemovalHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,33 @@ public void quotedNewlineChar(int index, int codePoint) {
}

@Override
public void openGroup(int index, int codePoint) {
buffer.appendCodePoint(codePoint);
public void leftParenthesis(int index) {
buffer.append('(');
}

@Override
public void closeGroup(int index, int codePoint) {
buffer.appendCodePoint(codePoint);
public void leftSquareBracket(int index) {
buffer.append('[');
}

@Override
public void leftCurlyBracket(int index) {
buffer.append('{');
}

@Override
public void rightParenthesis(int index) {
buffer.append(')');
}

@Override
public void rightSquareBracket(int index) {
buffer.append(']');
}

@Override
public void rightCurlyBracket(int index) {
buffer.append('}');
}

@Override
Expand Down
64 changes: 60 additions & 4 deletions src/main/java/io/sf/carte/uparser/TokenHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -81,24 +81,80 @@ public interface TokenHandler {
void quotedNewlineChar(int index, int codePoint);

/**
* Called when one of these codepoints is found: (, [, {
* Called when the {@code (} codepoint is found.
*
* @param index
* the index at which the codepoint was found.
*/
void leftParenthesis(int index);

/**
* Called when the {@code [} codepoint is found.
*
* @param index
* the index at which the codepoint was found.
*/
void leftSquareBracket(int index);

/**
* Called when the <code>{</code> codepoint is found.
*
* @param index
* the index at which the codepoint was found.
*/
void leftCurlyBracket(int index);

/**
* Called when the {@code )} codepoint is found.
*
* @param index
* the index at which the codepoint was found.
*/
void rightParenthesis(int index);

/**
* Called when the {@code ]} codepoint is found.
*
* @param index
* the index at which the codepoint was found.
*/
void rightSquareBracket(int index);

/**
* Called when the <code>}</code> codepoint is found.
*
* @param index
* the index at which the codepoint was found.
*/
void rightCurlyBracket(int index);

/**
* Called when start punctuation (Ps) codepoints are found (except characters
* handled by {@link #leftCurlyBracket(int)}, {@link #leftParenthesis(int)} and
* {@link #leftSquareBracket(int)}).
*
* @param index
* the index at which the codepoint was found.
* @param codePoint
* the found codepoint.
*/
void openGroup(int index, int codePoint);
default void startPunctuation(int index, int codePoint) {
character(index, codePoint);
}

/**
* Called when one of these codepoints is found: ), ], }
* Called when end punctuation (Pe) codepoints are found (except characters
* handled by {@link #rightCurlyBracket(int)}, {@link #rightParenthesis(int)}
* and {@link #rightSquareBracket(int)}).
*
* @param index
* the index at which the codepoint was found.
* @param codePoint
* the found codepoint.
*/
void closeGroup(int index, int codePoint);
default void endPunctuation(int index, int codePoint) {
character(index, codePoint);
}

/**
* Other characters including punctuation (excluding connector punctuation) and symbols
Expand Down
44 changes: 34 additions & 10 deletions src/main/java/io/sf/carte/uparser/TokenProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -588,31 +588,55 @@ int processCodePoint(int cp, boolean nocomment) throws IOException {
break;
}
checkPreviousWord();
if (cp == 40 || cp == 91 || cp == 123) {
// ([{
handler.openGroup(rootIndex, cp);
// ([{
switch (cp) {
case 40:
handler.leftParenthesis(rootIndex);
previdx = rootIndex + 1;
prevtype = Character.OTHER_PUNCTUATION;
break;
case 91:
handler.leftSquareBracket(rootIndex);
previdx = rootIndex + 1;
prevtype = Character.OTHER_PUNCTUATION;
break;
case 123:
handler.leftCurlyBracket(rootIndex);
previdx = rootIndex + 1;
prevtype = Character.OTHER_PUNCTUATION;
break;
default:
handler.startPunctuation(rootIndex, cp);
updatePrev(cp);
}
handleCharacter(cp);
updatePrev(cp);
break;
case Character.END_PUNCTUATION:
if (charCheck.isAllowedCharacter(cp, this)) {
prevtype = Character.UPPERCASE_LETTER;
break;
}
checkPreviousWord();
if (cp == 41 || cp == 93 || cp == 125) {
// )]}
handler.closeGroup(rootIndex, cp);
// )]}
switch (cp) {
case 41:
handler.rightParenthesis(rootIndex);
previdx = rootIndex + 1;
prevtype = Character.OTHER_PUNCTUATION;
break;
case 93:
handler.rightSquareBracket(rootIndex);
previdx = rootIndex + 1;
prevtype = Character.OTHER_PUNCTUATION;
break;
case 125:
handler.rightCurlyBracket(rootIndex);
previdx = rootIndex + 1;
prevtype = Character.OTHER_PUNCTUATION;
break;
default:
handler.endPunctuation(rootIndex, cp);
updatePrev(cp);
}
handleCharacter(cp);
updatePrev(cp);
break;
case Character.OTHER_PUNCTUATION:
if (charCheck.isAllowedCharacter(cp, this)) {
Expand Down
73 changes: 73 additions & 0 deletions src/main/java/io/sf/carte/uparser/util/LegacyTokenHandler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
Copyright (c) 2017-2023, Carlos Amengual.
Licensed under a BSD-style License. You can find the license here:
https://css4j.github.io/LICENSE.txt
*/

// SPDX-License-Identifier: BSD-3-Clause

package io.sf.carte.uparser.util;

import io.sf.carte.uparser.TokenHandler;
import io.sf.carte.uparser.TokenProducer;

/**
* A {@link TokenHandler} that is backwards-compatible with
* {@code TokenProducer} 1.x.
*/
public interface LegacyTokenHandler extends TokenHandler {

@Override
default void leftParenthesis(int index) {
openGroup(index, TokenProducer.CHAR_LEFT_PAREN);
}

@Override
default void leftSquareBracket(int index) {
openGroup(index, TokenProducer.CHAR_LEFT_SQ_BRACKET);
}

@Override
default void leftCurlyBracket(int index) {
openGroup(index, TokenProducer.CHAR_LEFT_CURLY_BRACKET);
}

@Override
default void rightParenthesis(int index) {
closeGroup(index, TokenProducer.CHAR_RIGHT_PAREN);
}

@Override
default void rightSquareBracket(int index) {
closeGroup(index, TokenProducer.CHAR_RIGHT_SQ_BRACKET);
}

@Override
default void rightCurlyBracket(int index) {
closeGroup(index, TokenProducer.CHAR_RIGHT_CURLY_BRACKET);
}

/**
* Called when one of these codepoints is found: (, [, {
*
* @param index
* the index at which the codepoint was found.
* @param codePoint
* the found codepoint.
*/
void openGroup(int index, int codePoint);

/**
* Called when one of these codepoints is found: ), ], }
*
* @param index
* the index at which the codepoint was found.
* @param codePoint
* the found codepoint.
*/
void closeGroup(int index, int codePoint);

}
4 changes: 4 additions & 0 deletions src/main/java/io/sf/carte/uparser/util/package-info.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/**
* Utility interfaces
*/
package io.sf.carte.uparser.util;
1 change: 1 addition & 0 deletions src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
*/
module io.sf.carte.tokenproducer {
exports io.sf.carte.uparser;
exports io.sf.carte.uparser.util;
}
38 changes: 30 additions & 8 deletions src/test/java/io/sf/carte/uparser/TestTokenHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,39 @@ public void separator(int index, int cp) {
}

@Override
public void openGroup(int index, int codepoint) {
char[] chars = Character.toChars(codepoint);
openbuffer.append(chars);
punctbuffer.append(chars);
public void leftParenthesis(int index) {
openbuffer.append('(');
punctbuffer.append('(');
}

@Override
public void closeGroup(int index, int codepoint) {
char[] chars = Character.toChars(codepoint);
closebuffer.append(chars);
punctbuffer.append(chars);
public void leftSquareBracket(int index) {
openbuffer.append('[');
punctbuffer.append('[');
}

@Override
public void leftCurlyBracket(int index) {
openbuffer.append('{');
punctbuffer.append('{');
}

@Override
public void rightParenthesis(int index) {
closebuffer.append(')');
punctbuffer.append(')');
}

@Override
public void rightSquareBracket(int index) {
closebuffer.append(']');
punctbuffer.append(']');
}

@Override
public void rightCurlyBracket(int index) {
closebuffer.append('}');
punctbuffer.append('}');
}

@Override
Expand Down

0 comments on commit 6a60cb5

Please sign in to comment.