Skip to content

Commit

Permalink
Merge pull request #1886 from robstoll/bugfix/1884-regex-surrogate-co…
Browse files Browse the repository at this point in the history
…de-points

#1884 take surrogate code points into account in RegexSearcher
  • Loading branch information
robstoll authored Dec 27, 2024
2 parents 4296ef1 + 82b1e45 commit 6a108dd
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class CharSequenceToContainRegexExpectationsSpec : Spek({
include(StringSpec)
include(RegexSpec)


describe("context 'aaaa'") {
it("search for 'aa' finds 3 hits since we want non-disjoint matches") {
expect("aaaa") toContain o exactly 3 regex "aa"
Expand All @@ -18,6 +17,15 @@ class CharSequenceToContainRegexExpectationsSpec : Spek({
expect("aaaa") toContain o exactly 4 regex "aa?"
}
}
describe("context ''") {
val g = "🚩\uFE0F"
it("search for '$g$g' finds 3 hits since we want non-disjoint matches") {
expect("$g$g$g$g") toContain o exactly 3 regex "$g$g"
}
it("search for '$g($g)?' finds 4 hits since we want non-disjoint matches") {
expect("$g$g$g$g") toContain o exactly 4 regex "$g($g)?"
}
}
}) {
object StringSpec : ch.tutteli.atrium.specs.integration.CharSequenceToContainRegexExpectationsSpec(
getNameContainsRegex(),
Expand Down Expand Up @@ -60,7 +68,12 @@ class CharSequenceToContainRegexExpectationsSpec : Spek({
if (aX.isEmpty()) expect toContain o atLeast atLeast regex a
else expect toContain o atLeast atLeast the regexPatterns(a, *aX)

private fun toContainAtLeastRegex(expect: Expect<CharSequence>, atLeast: Int, a: String, aX: Array<out String>) =
private fun toContainAtLeastRegex(
expect: Expect<CharSequence>,
atLeast: Int,
a: String,
aX: Array<out String>
) =
if (aX.isEmpty()) expect toContain o atLeast atLeast matchFor Regex(a)
else expect toContain o atLeast atLeast matchFor all(Regex(a), *aX.map { it.toRegex() }.toTypedArray())

Expand Down
4 changes: 4 additions & 0 deletions atrium-core/api/main/atrium-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,10 @@ public final class ch/tutteli/atrium/core/Some : ch/tutteli/atrium/core/Option {
public fun toString ()Ljava/lang/String;
}

public final class ch/tutteli/atrium/core/polyfills/CharExtensionsKt {
public static final fun isHighSurrogate (C)Z
}

public final class ch/tutteli/atrium/core/polyfills/FormatFloatingPointNumberKt {
public static final fun formatFloatingPointNumber (Ljava/lang/Number;)Ljava/lang/String;
}
Expand Down
4 changes: 4 additions & 0 deletions atrium-core/api/using-kotlin-1.9-or-newer/atrium-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,10 @@ public final class ch/tutteli/atrium/core/Some : ch/tutteli/atrium/core/Option {
public fun toString ()Ljava/lang/String;
}

public final class ch/tutteli/atrium/core/polyfills/CharExtensionsKt {
public static final fun isHighSurrogate (C)Z
}

public final class ch/tutteli/atrium/core/polyfills/FormatFloatingPointNumberKt {
public static final fun formatFloatingPointNumber (Ljava/lang/Number;)Ljava/lang/String;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package ch.tutteli.atrium.core.polyfills

/**
* Indicates if the char is a [High-Surrogate Code Unit](http://www.unicode.org/glossary/#high_surrogate_code_unit).
* @return true if it is a high-surrogate code unit, false otherwise.
*
* @since 1.3.0
*/
expect fun Char.isHighSurrogate(): Boolean
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package ch.tutteli.atrium.core.polyfills

const val SURROGATE_RANGE_START = '\uD800'
const val SURROGATE_RANGE_END_INCLUSIVE = '\uDBFF'

actual fun Char.isHighSurrogate(): Boolean =
this >= SURROGATE_RANGE_START && this <= SURROGATE_RANGE_END_INCLUSIVE;

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package ch.tutteli.atrium.core.polyfills

actual fun Char.isHighSurrogate(): Boolean = Character.isHighSurrogate(this)

Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@ import ch.tutteli.atrium.logic.creating.charsequence.contains.searchbehaviours.N
class RegexSearcher : Searcher<NoOpSearchBehaviour, Regex> {
override fun search(searchIn: CharSequence, searchFor: Regex): Int {
var counter = 0
var matchResult = searchFor.find(searchIn)
val searchInString = searchIn.toString()
var matchResult = searchFor.find(searchInString)
while (matchResult != null) {
matchResult = searchFor.find(searchIn, matchResult.range.first + 1)
++counter
val startIndex = matchResult.range.first.let { startIndex ->
startIndex + if (searchInString[startIndex].isHighSurrogate()) 2 else 1
}
matchResult = searchFor.find(searchIn, startIndex)
}
return counter
}
Expand Down

0 comments on commit 6a108dd

Please sign in to comment.