diff --git a/README.md b/README.md index bca9b5c..e62f53e 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# korlibs-library-template \ No newline at end of file +# korlibs-string \ No newline at end of file diff --git a/korlibs-simple/.gitignore b/korlibs-simple/.gitignore deleted file mode 100644 index 796b96d..0000000 --- a/korlibs-simple/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/build diff --git a/korlibs-simple/src/korlibs/simple/Simple.kt b/korlibs-simple/src/korlibs/simple/Simple.kt deleted file mode 100644 index 8349a68..0000000 --- a/korlibs-simple/src/korlibs/simple/Simple.kt +++ /dev/null @@ -1,4 +0,0 @@ -package korlibs.simple - -class Simple { -} \ No newline at end of file diff --git a/korlibs-simple/module.yaml b/korlibs-string/module.yaml similarity index 78% rename from korlibs-simple/module.yaml rename to korlibs-string/module.yaml index 399d46a..b872aa2 100644 --- a/korlibs-simple/module.yaml +++ b/korlibs-string/module.yaml @@ -8,6 +8,5 @@ aliases: - jvmAndAndroid: [jvm, android] dependencies: - -test-dependencies: - + - org.jetbrains.kotlinx:atomicfu:0.24.0: exported + - com.soywiz:korlibs-platform:6.0.0: exported diff --git a/korlibs-string/src/korlibs/io/lang/Charset.kt b/korlibs-string/src/korlibs/io/lang/Charset.kt new file mode 100644 index 0000000..70d9b07 --- /dev/null +++ b/korlibs-string/src/korlibs/io/lang/Charset.kt @@ -0,0 +1,291 @@ +package korlibs.io.lang + +import korlibs.io.lang.internal.* +import korlibs.io.lang.internal.extract +import korlibs.io.lang.internal.insert +import korlibs.io.lang.internal.getS16 +import korlibs.io.lang.internal.set16 +import korlibs.memory.* +import kotlinx.atomicfu.locks.* +import kotlin.math.* + +internal expect val platformCharsetProvider: CharsetProvider + +private val CHARSET_PROVIDERS = arrayListOf() +private val CHARSET_PROVIDERS_LOCK = reentrantLock() + +fun interface CharsetProvider { + companion object : CharsetProvider { + override fun invoke(normalizedName: String, name: String): Charset? = platformCharsetProvider.invoke(normalizedName, name) + } + operator fun invoke(normalizedName: String, name: String): Charset? +} + +abstract class Charset(val name: String) { + // Just an estimation, might not be accurate, but hopefully will help setting StringBuilder and ByteArrayBuilder to a better initial capacity + open fun estimateNumberOfCharactersForBytes(nbytes: Int): Int = nbytes * 2 + open fun estimateNumberOfBytesForCharacters(nchars: Int): Int = nchars * 2 + + abstract fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int = 0, end: Int = src.length) + + /** + * Decodes the [src] [ByteArray] [start]-[end] range using this [Charset] + * and writes the result into the [out] [StringBuilder]. + * + * Returns the number of consumed bytes ([end]-[start] if not under-flowing) and less if a character is not complete. + **/ + abstract fun decode(out: StringBuilder, src: ByteArray, start: Int = 0, end: Int = src.size): Int + + companion object { + inline fun registerProvider(provider: CharsetProvider, block: () -> T): T { + registerProvider(provider) + return try { + block() + } finally { + unregisterProvider(provider) + } + } + + fun registerProvider(provider: CharsetProvider) { + CHARSET_PROVIDERS_LOCK.withLock { + CHARSET_PROVIDERS.add(provider) + } + } + + fun unregisterProvider(provider: CharsetProvider) { + CHARSET_PROVIDERS_LOCK.withLock { + CHARSET_PROVIDERS.remove(provider) + } + } + + fun forName(name: String): Charset { + val normalizedName = name.uppercase().replace("_", "").replace("-", "") + when (normalizedName) { + "UTF8" -> return UTF8 + "UTF16", "UTF16LE" -> return UTF16_LE + "UTF16BE" -> return UTF16_BE + "ISO88591", "LATIN1" -> return ISO_8859_1 + } + CHARSET_PROVIDERS_LOCK.withLock { + for (provider in CHARSET_PROVIDERS) { + provider(normalizedName, name)?.let { return it } + } + } + platformCharsetProvider(normalizedName, name)?.let { return it } + throw IllegalArgumentException("Unknown charset '$name'") + } + + fun StringBuilder.appendCodePointV(codePoint: Int) { + if (codePoint in 0xD800..0xDFFF || codePoint > 0xFFFF) { + val U0 = codePoint - 0x10000 + val hs = U0.extract(10, 10) + val ls = U0.extract(0, 10) + append(((0b110110 shl 10) or (hs)).toChar()) + append(((0b110111 shl 10) or (ls)).toChar()) + } else { + append(codePoint.toChar()) + } + } + + inline fun decodeCodePoints(src: CharSequence, start: Int, end: Int, block: (codePoint: Int) -> Unit) { + var highSurrogate = 0 + loop@for (n in start until end) { + val char = src[n].toInt() + val codePoint = if (char in 0xD800..0xDFFF) { + when (char.extract(10, 6)) { + 0b110110 -> { + highSurrogate = char and 0x3FF + continue@loop + } + 0b110111 -> { + 0x10000 + ((highSurrogate shl 10) or (char and 0x3FF)) + } + else -> error("Unknown $char") + } + } else { + char + } + block(codePoint) + } + } + } +} + +open class UTC8CharsetBase(name: String) : Charset(name) { + override fun estimateNumberOfCharactersForBytes(nbytes: Int): Int = nbytes * 2 + override fun estimateNumberOfBytesForCharacters(nchars: Int): Int = nchars * 2 + + private fun createByte(codePoint: Int, shift: Int): Int = codePoint shr shift and 0x3F or 0x80 + + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + decodeCodePoints(src, start, end) { codePoint -> + if (codePoint and 0x7F.inv() == 0) { // 1-byte sequence + out.append(codePoint.toByte()) + } else { + when { + codePoint and 0x7FF.inv() == 0 -> // 2-byte sequence + out.append((codePoint shr 6 and 0x1F or 0xC0).toByte()) + codePoint and 0xFFFF.inv() == 0 -> { // 3-byte sequence + out.append((codePoint shr 12 and 0x0F or 0xE0).toByte()) + out.append((createByte(codePoint, 6)).toByte()) + } + codePoint and -0x200000 == 0 -> { // 4-byte sequence + out.append((codePoint shr 18 and 0x07 or 0xF0).toByte()) + out.append((createByte(codePoint, 12)).toByte()) + out.append((createByte(codePoint, 6)).toByte()) + } + } + out.append((codePoint and 0x3F or 0x80).toByte()) + } + } + } + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + if ((start < 0 || start > src.size) || (end < 0 || end > src.size)) error("Out of bounds") + var i = start + loop@while (i < end) { + val c = src[i].toInt() and 0xFF + + when (c shr 4) { + in 0b0000..0b0111 -> { + // 0xxxxxxx + out.appendCodePointV(c) + i += 1 + } + in 0b1100..0b1101 -> { + // 110x xxxx 10xx xxxx + if (i + 1 >= end) break@loop + out.appendCodePointV((c and 0x1F shl 6 or (src[i + 1].toInt() and 0x3F))) + i += 2 + } + 0b1110 -> { + // 1110 xxxx 10xx xxxx 10xx xxxx + if (i + 2 >= end) break@loop + out.appendCodePointV((c and 0x0F shl 12 or (src[i + 1].toInt() and 0x3F shl 6) or (src[i + 2].toInt() and 0x3F))) + i += 3 + } + 0b1111 -> { + // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx + if (i + 3 >= end) break@loop + out.appendCodePointV(0 + .insert(src[i + 0].toInt().extract(0, 3), 18, 3) + .insert(src[i + 1].toInt().extract(0, 6), 12, 6) + .insert(src[i + 2].toInt().extract(0, 6), 6, 6) + .insert(src[i + 3].toInt().extract(0, 6), 0, 6) + ) + i += 4 + + } + else -> { + out.append('\uFFFD') + i += 1 + //TODO("${c shr 4}") + } + } + } + return i - start + } +} + +abstract class BaseSingleByteCharset(name: String) : Charset(name) { + override fun estimateNumberOfCharactersForBytes(nbytes: Int): Int = nbytes + override fun estimateNumberOfBytesForCharacters(nchars: Int): Int = nchars +} + +open class SingleByteCharset(name: String, val conv: String) : BaseSingleByteCharset(name) { + private val maxCode = conv.maxOrNull()?.plus(1)?.code ?: 0 + val v = ByteArray(maxCode).also { it.fill('?'.code.toByte()); for (n in conv.indices) it[conv[n].code] = n.toByte() } + + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + for (n in start until end) { + val c = src[n].code + out.append(v.getOrElse(c) { '?'.code.toByte() }) + } + } + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + for (n in start until end) { + out.append(conv[src[n].toInt() and 0xFF]) + } + return end - start + } +} + +object ISO_8859_1 : SingleByteCharset("ISO-8859-1", buildString { for (n in 0 until 256) append(n.toChar()) }) + +val UTF8: Charset = UTC8CharsetBase("UTF-8") + +class UTF16Charset(val le: Boolean) : Charset("UTF-16-" + (if (le) "LE" else "BE")) { + override fun estimateNumberOfCharactersForBytes(nbytes: Int): Int = nbytes * 2 + override fun estimateNumberOfBytesForCharacters(nchars: Int): Int = nchars * 2 + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + var consumed = 0 + for (n in start until end step 2) { + val char = src.getS16(n, le).toChar() + out.append(char) + consumed += 2 + } + return consumed + } + + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + val temp = ByteArray(2) + for (n in start until end) { + temp.set16(0, src[n].code, le) + out.append(temp) + } + } +} + +object ASCII : SingleByteCharset("ASCII", CharArray(128) { it.toChar() }.concatToString() + "\u00c7\u00fc\u00e9\u00e2\u00e4\u00e0\u00e5\u00e7\u00ea\u00eb\u00e8\u00ef\u00ee\u00ec\u00c4\u00c5\u00c9\u00e6\u00c6\u00f4\u00f6\u00f2\u00fb\u00f9\u00ff\u00d6\u00dc\u00f8\u00a3\u00d8\u00d7\u0192\u00e1\u00ed\u00f3\u00fa\u00f1\u00d1\u00aa\u00ba\u00bf\u00ae\u00ac\u00bd\u00bc\u00a1\u00ab\u00bb\u2591\u2592\u2593\u2502\u2524\u00c1\u00c2\u00c0\u00a9\u2563\u2551\u2557\u255d\u00a2\u00a5\u2510\u2514\u2534\u252c\u251c\u2500\u253c\u00e3\u00c3\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u00a4\u00f0\u00d0\u00ca\u00cb\u00c8\u0131\u00cd\u00ce\u00cf\u2518\u250c\u2588\u2584\u00a6\u00cc\u2580\u00d3\u00df\u00d4\u00d2\u00f5\u00d5\u00b5\u00fe\u00de\u00da\u00db\u00d9\u00fd\u00dd\u00af\u00b4\u00ad\u00b1\u2017\u00be\u00b6\u00a7\u00f7\u00b8\u00b0\u00a8\u00b7\u00b9\u00b3\u00b2\u25a0\u00a0") + +val LATIN1 = ISO_8859_1 +val UTF16_LE = UTF16Charset(le = true) +val UTF16_BE = UTF16Charset(le = false) + +object Charsets { + val UTF8 get() = korlibs.io.lang.UTF8 + val LATIN1 get() = korlibs.io.lang.LATIN1 + val UTF16_LE get() = korlibs.io.lang.UTF16_LE + val UTF16_BE get() = korlibs.io.lang.UTF16_BE +} + +// Kotlin-like variants +fun String.encodeToByteArray(charset: Charset, startIndex: Int = 0, endIndex: Int = this.length, throwOnInvalidSequence: Boolean = false): ByteArray = + toByteArray(charset, startIndex, endIndex) + +fun ByteArray.decodeToString(charset: Charset, startIndex: Int = 0, endIndex: Int = this.size, throwOnInvalidSequence: Boolean = false): String = + this.toString(charset, startIndex, endIndex) + +// Korlibs-like variants + +fun String.toByteArray(charset: Charset = UTF8, start: Int = 0, end: Int = this.length): ByteArray { + val out = ByteArrayBuilder(charset.estimateNumberOfBytesForCharacters(end - start)) + charset.encode(out, this, start, end) + return out.toByteArray() +} + +fun ByteArray.toString(charset: Charset, start: Int = 0, end: Int = this.size): String { + val out = StringBuilder(charset.estimateNumberOfCharactersForBytes(end - start)) + charset.decode(out, this, start, end) + return out.toString() +} + +fun ByteArray.readStringz(o: Int, size: Int, charset: Charset = UTF8): String { + var idx = o + val stop = min(this.size, o + size) + while (idx < stop) { + if (this[idx] == 0.toByte()) break + idx++ + } + return this.copyOfRange(o, idx).toString(charset) +} + +fun ByteArray.readStringz(o: Int, charset: Charset = UTF8): String { + return readStringz(o, size - o, charset) +} + +fun ByteArray.readString(o: Int, size: Int, charset: Charset = UTF8): String { + return this.copyOfRange(o, o + size).toString(charset) +} diff --git a/korlibs-string/src/korlibs/io/lang/StringCase.kt b/korlibs-string/src/korlibs/io/lang/StringCase.kt new file mode 100644 index 0000000..1d2213f --- /dev/null +++ b/korlibs-string/src/korlibs/io/lang/StringCase.kt @@ -0,0 +1,65 @@ +package korlibs.io.lang + +import korlibs.io.util.* +import kotlin.jvm.* +import kotlin.text.isDigit + +@Deprecated("Use StringCase instead") +typealias TextCase = StringCase + +val String.case: StringCase get() = StringCase.autodetect(this) + +@Deprecated("this.case", ReplaceWith("this.case")) +fun String.textCase(): StringCase = this.case +@Deprecated("this.case", ReplaceWith("this.case")) +fun String.stringCase(): StringCase = this.case + +@JvmInline +value class StringCase(val words: List) { + constructor(vararg words: String) : this(words.toList()) + + companion object { + //@Deprecated("", ReplaceWith("autodetect(str, lower)", "korlibs.io.lang.StringCase.Companion.autodetect")) + //operator fun invoke(str: String, lower: Boolean = true): StringCase = autodetect(str, lower) + + fun autodetect(str: String, lower: Boolean = true): StringCase { + val words = mutableListOf() + val buffer = StringBuilder() + fun flush() { + if (buffer.isEmpty()) return + words.add(buffer.toString()) + buffer.clear() + } + + val DIGIT = 0 + val UPPER = 1 + val LOWER = 2 + + var prevCase = LOWER + for (c in str) { + val currCase = when { + c.isDigit() -> DIGIT + c.isUpperCase() -> UPPER + else -> LOWER + } + if (currCase != LOWER && currCase != prevCase) flush() + when { + c == '_' || c == '-' || c == '.' || c == ',' || c.isWhitespaceFast() -> flush() + else -> { + buffer.append(if (lower) c.lowercaseChar() else c) + prevCase = currCase + } + } + } + flush() + return StringCase(words) + } + } + + val spaceCase: String get() = words.joinToString(" ") { it.lowercase() } + val snakeCase: String get() = words.joinToString("_") { it.lowercase() } + val kebabCase: String get() = words.joinToString("-") { it.lowercase() } + val screamingSnakeCase: String get() = words.joinToString("_") { it.uppercase() } + val pascalCase: String get() = words.joinToString("") { it.lowercase().replaceFirstChar { it.uppercaseChar() } } + val camelCase: String get() = pascalCase.replaceFirstChar { it.lowercaseChar() } +} diff --git a/korlibs-string/src/korlibs/io/lang/StringExt.kt b/korlibs-string/src/korlibs/io/lang/StringExt.kt new file mode 100644 index 0000000..e5f91a7 --- /dev/null +++ b/korlibs-string/src/korlibs/io/lang/StringExt.kt @@ -0,0 +1,97 @@ +package korlibs.io.lang + +operator fun String.Companion.invoke(arrays: IntArray, offset: Int = 0, size: Int = arrays.size - offset): String { + val sb = StringBuilder(size) + for (n in offset until offset + size) { + sb.append(arrays[n].toChar()) // @TODO: May not work the same! In JS: String.fromCodePoint + } + return sb.toString() +} + +fun String.Companion.fromIntArray(arrays: IntArray, offset: Int = 0, size: Int = arrays.size - offset): String = String(arrays, offset, size) +fun String.Companion.fromCharArray(arrays: CharArray, offset: Int = 0, size: Int = arrays.size - offset): String = arrays.concatToString(offset, offset + size) + +//////////////////////////////////// +//////////////////////////////////// + +fun String.splitKeep(regex: Regex): List { + val str = this + val out = arrayListOf() + var lastPos = 0 + for (part in regex.findAll(this)) { + val prange = part.range + if (lastPos != prange.start) { + out += str.substring(lastPos, prange.start) + } + out += str.substring(prange) + lastPos = prange.endInclusive + 1 + } + if (lastPos != str.length) { + out += str.substring(lastPos) + } + return out +} + +private val replaceNonPrintableCharactersRegex by lazy { Regex("[^ -~]") } +fun String.replaceNonPrintableCharacters(replacement: String = "?"): String { + return this.replace(replaceNonPrintableCharactersRegex, replacement) +} + +fun String.indexOfOrNull(char: Char, startIndex: Int = 0): Int? = this.indexOf(char, startIndex).takeIf { it >= 0 } + +fun String.lastIndexOfOrNull(char: Char, startIndex: Int = lastIndex): Int? = + this.lastIndexOf(char, startIndex).takeIf { it >= 0 } + +fun String.splitInChunks(size: Int): List { + val out = arrayListOf() + var pos = 0 + while (pos < this.length) { + out += this.substring(pos, kotlin.math.min(this.length, pos + size)) + pos += size + } + return out +} + +fun String.substr(start: Int): String = this.substr(start, this.length) + +fun String.substr(start: Int, length: Int): String { + val low = (if (start >= 0) start else this.length + start).coerceIn(0, this.length) + val high = (if (length >= 0) low + length else this.length + length).coerceIn(0, this.length) + return if (high >= low) this.substring(low, high) else "" +} + +inline fun String.eachBuilder(transform: StringBuilder.(Char) -> Unit): String = buildString { + @Suppress("ReplaceManualRangeWithIndicesCalls") // Performance reasons? Check that plain for doesn't allocate + for (n in 0 until this@eachBuilder.length) transform(this, this@eachBuilder[n]) +} + +inline fun String.transform(transform: (Char) -> String): String = buildString { + @Suppress("ReplaceManualRangeWithIndicesCalls") // Performance reasons? Check that plain for doesn't allocate + for (n in 0 until this@transform.length) append(transform(this@transform[n])) +} + +fun String.parseInt(): Int = when { + this.startsWith("0x", ignoreCase = true) -> this.substring(2).toLong(16).toInt() + this.startsWith("0o", ignoreCase = true) -> this.substring(2).toLong(8).toInt() + this.startsWith("0b", ignoreCase = true) -> this.substring(2).toLong(2).toInt() + else -> this.toInt() +} + +fun String.toCharArray() = CharArray(length) { this@toCharArray[it] } + +fun String.withoutRange(range: IntRange): String = this.substr(0, range.first) + this.substr(range.last + 1) +fun String.withoutIndex(index: Int): String = this.substr(0, index) + this.substr(index + 1) +fun String.withInsertion(index: Int, insertedText: String): String { + val before = this.substr(0, index) + val after = this.substr(index, this.length) + return "$before$insertedText$after" +} + +fun String.Companion.substringEquals(a: String, aIndex: Int, b: String, bIndex: Int, count: Int): Boolean { + if (count == 0) return true + if (aIndex < 0 || bIndex < 0) return false + if (aIndex + count > a.length) return false + if (bIndex + count > b.length) return false + for (n in 0 until count) if (a[aIndex + n] != b[bIndex + n]) return false + return true +} diff --git a/korlibs-string/src/korlibs/io/lang/WString.kt b/korlibs-string/src/korlibs/io/lang/WString.kt new file mode 100644 index 0000000..d755152 --- /dev/null +++ b/korlibs-string/src/korlibs/io/lang/WString.kt @@ -0,0 +1,142 @@ +package korlibs.io.lang + +// @TODO: UTf-8 variant with seeking points? +// @TODO: trying to be more space efficient for long strings? +// @TODO: while having a decent performance + +/** + * UTF-32 String. Each element in the [codePoints] array represents a character. + * + * While on plain [String] it requires surrogate pairs to represent some characters. + */ +//inline +class WString private constructor(private val codePoints: IntArray, private val string: String) { + val length get() = codePoints.size + + operator fun get(index: Int): WChar = WChar(codePoints[index]) + inline fun getOrElse(index: Int, defaultValue: (Int) -> WChar): WChar { + if (index < 0 || index >= length) return defaultValue(index) + return this[index] + } + fun codePointAt(index: Int) = this[index].codePoint + + fun substring(startIndex: Int): WString = WString(codePoints.copyOfRange(startIndex, codePoints.size), string.substring(startIndex)) + fun substring(startIndex: Int, endIndex: Int): WString = WString(codePoints.copyOfRange(startIndex, endIndex), string.substring(startIndex, endIndex)) + + fun toCodePointIntArray() = codePoints.copyOf() + + companion object { + private val EMPTY = WString(intArrayOf(), "") + + operator fun invoke(codePoints: IntArray) = fromCodePoints(codePoints) + operator fun invoke(string: String) = fromString(string) + + // Decode surrogate pairs + fun fromString(string: String): WString { + if (string == "") return EMPTY + val codePoints = IntArray(string.length) + val length = string.forEachCodePoint { index, codePoint, error -> codePoints[index] = codePoint } + return WString(codePoints.copyOf(length), string) + } + + fun fromCodePoints(codePoints: IntArray): WString { + val surrogateCount = codePoints.count { it >= 0x10000 } + val out = StringBuilder(codePoints.size + surrogateCount) + for (codePoint in codePoints) { + if (codePoint > 0xFFFF) { + val U1 = codePoint - 0x10000 + val W1 = 0xD800 or ((U1 ushr 10) and 0x3FF) + val W2 = 0xDC00 or ((U1 ushr 0) and 0x3FF) + out.append(W1.toChar()) + out.append(W2.toChar()) + } else { + out.append(codePoint.toChar()) + } + } + return WString(codePoints, out.toString()) + } + } + + private var cachedHashCodeValue = 0 + private var cachedHashCode = false + + override fun hashCode(): Int { + if (!cachedHashCode) { + cachedHashCode = true + cachedHashCodeValue = this.codePoints.contentHashCode() + } + return cachedHashCodeValue + } + override fun equals(other: Any?): Boolean = (other is WString) && this.codePoints.contentEquals(other.codePoints) //this.string == other.string + + // Encode surrogate pairs + override fun toString(): String = string +} + +inline fun WString.forEachCodePoint(block: (index: Int, codePoint: Int, error: Boolean) -> Unit): Int { + for (n in 0 until this.length) { + block(n, this[n].code, false) + } + return this.length +} + +inline fun String.forEachCodePoint(block: (index: Int, codePoint: Int, error: Boolean) -> Unit): Int { + val string = this + var m = 0 + var n = 0 + while (n < string.length) { + var value = string[n++].code + var error = false + // High surrogate + if ((value and 0xF800) == 0xD800 && n < string.length) { + val extra = string[n++].code + if ((extra and 0xFC00) != 0xDC00) { + n-- + error = true + } else { + val dataHigh = (value and 0x3FF) + val dataLow = (extra and 0x3FF) + + value = (dataLow or (dataHigh shl 10)) + 0x10000 + } + } + block(m++, value, error) + } + return m +} + +fun String.toWString() = WString(this) + +fun WString.substr(start: Int, length: Int = this.length): WString { + val low = (if (start >= 0) start else this.length + start).coerceIn(0, this.length) + val high = (if (length >= 0) low + length else this.length + length).coerceIn(0, this.length) + return if (high < low) WString("") else this.substring(low, high) +} + +inline class WChar(val codePoint: Int) { + val code: Int get() = codePoint + fun toChar(): Char = codePoint.toChar() + fun toInt(): Int = codePoint +} + +class WStringReader(val str: WString, var position: Int = 0) { + constructor(str: String, position: Int = 0) : this(str.toWString(), position) + val length: Int get() = str.length + val available: Int get() = str.length - position + val eof: Boolean get() = position >= str.length + val hasMore: Boolean get() = !eof + fun read(): WChar = str[position++] + fun peek(offset: Int = 0): WChar = str.getOrElse(this.position + offset) { WChar(0) } + fun skip(count: Int) { position += count } + fun substr(offset: Int, len: Int = str.length): WString = str.substr(this.position + offset, len) +} + +inline fun WStringReader?.keep(block: () -> T): T { + //return ::position.keep { block() } // @TODO: Is this optimized in Kotlin? + val old = this?.position ?: 0 + try { + return block() + } finally { + this?.position = old + } +} diff --git a/korlibs-string/src/korlibs/io/lang/internal/_Internal.kt b/korlibs-string/src/korlibs/io/lang/internal/_Internal.kt new file mode 100644 index 0000000..6553cba --- /dev/null +++ b/korlibs-string/src/korlibs/io/lang/internal/_Internal.kt @@ -0,0 +1,20 @@ +package korlibs.io.lang.internal + +@PublishedApi internal fun Int.signExtend(bits: Int): Int = (this shl (32 - bits)) shr (32 - bits) // Int.SIZE_BITS +@PublishedApi internal fun Int.extractByte(offset: Int): Byte = (this ushr offset).toByte() +@PublishedApi internal fun Int.extract(offset: Int, count: Int): Int = (this ushr offset) and count.mask() +@PublishedApi internal fun Int.mask(): Int = (1 shl this) - 1 +@PublishedApi internal fun Int.insert(value: Int, offset: Int, count: Int): Int { + val mask = count.mask() shl offset + val ovalue = (value shl offset) and mask + return (this and mask.inv()) or ovalue +} +@PublishedApi internal fun ByteArray.getS16(offset: Int, littleEndian: Boolean): Int = if (littleEndian) getS16LE(offset) else getS16BE(offset) +@PublishedApi internal fun ByteArray.getS16LE(offset: Int): Int = get16LE(offset).signExtend(16) +@PublishedApi internal inline fun ByteArray.get16LE(offset: Int): Int = (u8(offset + 0) shl 0) or (u8(offset + 1) shl 8) +@PublishedApi internal fun ByteArray.set16(offset: Int, value: Int, littleEndian: Boolean) { if (littleEndian) set16LE(offset, value) else set16BE(offset, value) } +@PublishedApi internal fun ByteArray.set16LE(offset: Int, value: Int) { this[offset + 0] = value.extractByte(0); this[offset + 1] = value.extractByte(8) } +@PublishedApi internal fun ByteArray.set16BE(offset: Int, value: Int) { this[offset + 1] = value.extractByte(0); this[offset + 0] = value.extractByte(8) } +@PublishedApi internal fun ByteArray.getS16BE(offset: Int): Int = get16BE(offset).signExtend(16) +@PublishedApi internal inline fun ByteArray.get16BE(offset: Int): Int = (u8(offset + 1) shl 0) or (u8(offset + 0) shl 8) +@PublishedApi internal fun ByteArray.u8(offset: Int): Int = this[offset].toInt() and 0xFF diff --git a/korlibs-string/src/korlibs/io/stream/CharReader.kt b/korlibs-string/src/korlibs/io/stream/CharReader.kt new file mode 100644 index 0000000..646ac97 --- /dev/null +++ b/korlibs-string/src/korlibs/io/stream/CharReader.kt @@ -0,0 +1,7 @@ +package korlibs.io.stream + +interface CharReader { + fun read(out: StringBuilder, count: Int): Int + fun clone(): CharReader +} +fun CharReader.read(count: Int): String = buildString(count) { read(this, count) } diff --git a/korlibs-string/src/korlibs/io/util/CharExt.kt b/korlibs-string/src/korlibs/io/util/CharExt.kt new file mode 100644 index 0000000..5c9e29b --- /dev/null +++ b/korlibs-string/src/korlibs/io/util/CharExt.kt @@ -0,0 +1,12 @@ +package korlibs.io.util + +fun Char.isWhitespaceFast(): Boolean = this == ' ' || this == '\t' || this == '\r' || this == '\n' +fun Char.isDigit(): Boolean = this in '0'..'9' +fun Char.isLetter(): Boolean = this in 'a'..'z' || this in 'A'..'Z' +fun Char.isLetterOrDigit(): Boolean = isLetter() || isDigit() +fun Char.isLetterOrUnderscore(): Boolean = this.isLetter() || this == '_' || this == '$' +fun Char.isLetterDigitOrUnderscore(): Boolean = this.isLetterOrDigit() || this == '_' || this == '$' +fun Char.isLetterOrDigitOrDollar(): Boolean = this.isLetterOrDigit() || this == '$' +val Char.isNumeric: Boolean get() = this.isDigit() || this == '.' || this == 'e' || this == '-' +fun Char.isPrintable(): Boolean = this in '\u0020'..'\u007e' || this in '\u00a1'..'\u00ff' +val Char.isPossibleFloatChar get() = (this in '0'..'9') || (this == '+') || (this == '-') || (this == 'e') || (this == 'E') || (this == '.') diff --git a/korlibs-string/src/korlibs/io/util/StringEscape.kt b/korlibs-string/src/korlibs/io/util/StringEscape.kt new file mode 100644 index 0000000..9f49f76 --- /dev/null +++ b/korlibs-string/src/korlibs/io/util/StringEscape.kt @@ -0,0 +1,76 @@ +package korlibs.io.util + +private const val HEX_DIGITS_LOWER = "0123456789abcdef" +fun String.escape(unicode: Boolean): String { + val out = StringBuilder(this.length + 16) + for (c in this) { + when (c) { + '\\' -> out.append("\\\\") + '"' -> out.append("\\\"") + '\n' -> out.append("\\n") + '\r' -> out.append("\\r") + '\t' -> out.append("\\t") + else -> when { + !unicode && c in '\u0000'..'\u001f' -> { + out.append("\\x") + out.append(HEX_DIGITS_LOWER[(c.code ushr 4) and 0xF]) + out.append(HEX_DIGITS_LOWER[(c.code ushr 0) and 0xF]) + } + unicode && !c.isPrintable() -> { + out.append("\\u") + out.append(HEX_DIGITS_LOWER[(c.code ushr 12) and 0xF]) + out.append(HEX_DIGITS_LOWER[(c.code ushr 8) and 0xF]) + out.append(HEX_DIGITS_LOWER[(c.code ushr 4) and 0xF]) + out.append(HEX_DIGITS_LOWER[(c.code ushr 0) and 0xF]) + } + else -> out.append(c) + } + } + } + return out.toString() +} +fun String.escape(): String = escape(unicode = false) +fun String.escapeUnicode(): String = escape(unicode = true) +@Deprecated("", ReplaceWith("escapeUnicode()")) fun String.uescape(): String = escapeUnicode() + +fun String.unescape(): String { + val out = StringBuilder(this.length) + var n = 0 + while (n < this.length) { + val c = this[n++] + when (c) { + '\\' -> { + val c2 = this[n++] + when (c2) { + '\\' -> out.append('\\') + '"' -> out.append('\"') + 'n' -> out.append('\n') + 'r' -> out.append('\r') + 't' -> out.append('\t') + 'x', 'u' -> { + val N = if (c2 == 'u') 4 else 2 + val chars = this.substring(n, n + N) + n += N + out.append(chars.toInt(16).toChar()) + } + else -> { + out.append("\\$c2") + } + } + } + else -> out.append(c) + } + } + return out.toString() +} + +fun String?.quote(unicode: Boolean): String = if (this != null) "\"${this.escape(unicode)}\"" else "null" +fun String?.quote(): String = quote(unicode = false) +fun String?.quoteUnicode(): String = quote(unicode = true) +@Deprecated("", ReplaceWith("quoteUnicode()")) fun String?.uquote(): String = quoteUnicode() + +fun String.isQuoted(): Boolean = this.startsWith('"') && this.endsWith('"') +fun String.unquote(): String = if (isQuoted()) this.substring(1, this.length - 1).unescape() else this + +val String?.quoted: String get() = this.quote() +val String.unquoted: String get() = this.unquote() diff --git a/korlibs-string/src/korlibs/io/util/StringExt.kt b/korlibs-string/src/korlibs/io/util/StringExt.kt new file mode 100644 index 0000000..5368148 --- /dev/null +++ b/korlibs-string/src/korlibs/io/util/StringExt.kt @@ -0,0 +1,11 @@ +package korlibs.io.util + +fun String.substringAfterOrNull(delimiter: Char): String? = if (this.contains(delimiter)) this.substringAfter(delimiter) else null +fun String.substringBeforeOrNull(delimiter: Char): String? = if (this.contains(delimiter)) this.substringBefore(delimiter) else null +fun String.substringAfterLastOrNull(delimiter: Char): String? = if (this.contains(delimiter)) this.substringAfterLast(delimiter) else null +fun String.substringBeforeLastOrNull(delimiter: Char): String? = if (this.contains(delimiter)) this.substringBeforeLast(delimiter) else null + +fun String.substringAfterOrNull(delimiter: String): String? = if (this.contains(delimiter)) this.substringAfter(delimiter) else null +fun String.substringBeforeOrNull(delimiter: String): String? = if (this.contains(delimiter)) this.substringBefore(delimiter) else null +fun String.substringAfterLastOrNull(delimiter: String): String? = if (this.contains(delimiter)) this.substringAfterLast(delimiter) else null +fun String.substringBeforeLastOrNull(delimiter: String): String? = if (this.contains(delimiter)) this.substringBeforeLast(delimiter) else null diff --git a/korlibs-string/src/korlibs/util/SimpleIndenter.kt b/korlibs-string/src/korlibs/util/SimpleIndenter.kt new file mode 100644 index 0000000..94f7700 --- /dev/null +++ b/korlibs-string/src/korlibs/util/SimpleIndenter.kt @@ -0,0 +1,91 @@ +package korlibs.util + +import kotlinx.atomicfu.locks.* + +interface SimpleIndenter { + fun clear() + fun inline(s: String): SimpleIndenter + fun line(s: String): SimpleIndenter + @ExperimentalStdlibApi + fun indent() + @ExperimentalStdlibApi + fun unindent() + override fun toString(): String + + object INDENTS { + private val lock = SynchronizedObject() + private val lastIndent = StringBuilder(1024) + private val INDENTS = ArrayList(1024) + + operator fun get(index: Int): String { + if (INDENTS.size <= index) { + synchronized(lock) { + while (INDENTS.size <= index) { + INDENTS.add(lastIndent.toString()) + lastIndent.append('\t') + } + } + } + return INDENTS[index] + } + } + + companion object { + operator fun invoke(trailingLine: Boolean = false): SimpleIndenter = Impl(trailingLine) + + @OptIn(ExperimentalStdlibApi::class) + class Impl(val trailingLine: Boolean) : SimpleIndenter { + var indentation = 0 + val lines = arrayListOf() + var currentLine = StringBuilder() + + override fun clear() { + indentation = 0 + lines.clear() + currentLine.clear() + } + + fun flush() { + if (currentLine.isNotEmpty()) { + lines += INDENTS[indentation] + currentLine.toString() + currentLine.clear() + } + } + + override fun inline(s: String): SimpleIndenter = this.apply { + currentLine.append(s) + } + + override fun line(s: String): SimpleIndenter = this.apply { + currentLine.append(s) + flush() + } + + override fun indent() { + flush() + indentation++ + } + + override fun unindent() { + flush() + indentation-- + } + + override fun toString(): String { + flush() + val end = lines.joinToString("\n") + return if (trailingLine) "$end\n" else end + } + } + } +} + +@OptIn(ExperimentalStdlibApi::class) +inline fun SimpleIndenter.indent(function: () -> T): T { + indent() + try { + return function() + } finally { + unindent() + } +} diff --git a/korlibs-string/src/korlibs/util/SimpleStrReader.ext.kt b/korlibs-string/src/korlibs/util/SimpleStrReader.ext.kt new file mode 100644 index 0000000..d778c6b --- /dev/null +++ b/korlibs-string/src/korlibs/util/SimpleStrReader.ext.kt @@ -0,0 +1,74 @@ +package korlibs.util + +import korlibs.io.util.* + +val SimpleStrReader.eof: Boolean get() = !hasMore + +fun SimpleStrReader.readUntilBuilder(char: Char, out: StringBuilder, included: Boolean = false): StringBuilder = + readUntilBuilder(included, out) { it == char } + +inline fun SimpleStrReader.readWhileBuilder(included: Boolean = false, out: StringBuilder, cond: (Char) -> Boolean): StringBuilder = + readUntilBuilder(included, out) { !cond(it) } + +inline fun SimpleStrReader.readUntilBuilder(included: Boolean = false, out: StringBuilder, cond: (Char) -> Boolean): StringBuilder { + while (hasMore) { + val c = peekChar() + if (cond(c)) { + if (included) { + readChar() + out.append(c) + } + break + } + readChar() + out.append(c) + } + return out +} + +fun SimpleStrReader.skipWhile(cond: (Char) -> Boolean): SimpleStrReader { + while (hasMore) { + val c = peekChar() + if (!cond(c)) { + return this + } + readChar() + } + return this +} + +fun SimpleStrReader.skipExpect(expected: Char) { + val readed = this.readChar() + if (readed != expected) { + throw IllegalArgumentException("Expected '$expected' but found '$readed' at $pos") + } +} + +fun SimpleStrReader.tryExpect(char: Char, consume: Boolean = true): Boolean { + val read = peekChar() + val isExpected = read == char + if (consume && isExpected) readChar() + return isExpected +} + +fun SimpleStrReader.read(count: Int): String { + val out = StringBuilder(count) + for (n in 0 until count) out.append(readChar()) + return out.toString() +} + +fun SimpleStrReader.skipSpaces(): SimpleStrReader { + this.skipWhile { it.isWhitespaceFast() } + return this +} + +fun SimpleStrReader.matchIdentifier(out: StringBuilder): StringBuilder? = readWhileBuilder(out = out) { it.isLetterDigitOrUnderscore() || it == '-' || it == '~' || it == ':' }.takeIf { it.isNotEmpty() } + +fun SimpleStrReader.matchSingleOrDoubleQuoteString(out: StringBuilder): StringBuilder? = when (this.peekChar()) { + '\'', '"' -> { + val quoteType = this.readChar() + out.append(quoteType) + this.readUntilBuilder(quoteType, out, included = true) + } + else -> null +} diff --git a/korlibs-string/src/korlibs/util/SimpleStrReader.kt b/korlibs-string/src/korlibs/util/SimpleStrReader.kt new file mode 100644 index 0000000..e2cc218 --- /dev/null +++ b/korlibs-string/src/korlibs/util/SimpleStrReader.kt @@ -0,0 +1,24 @@ +package korlibs.util + +interface SimpleStrReader { + val pos: Int + val hasMore: Boolean + fun readChar(): Char + fun peekChar(): Char + fun skip(count: Int = 1): SimpleStrReader { repeat(count) { readChar() }; return this } + fun clone(): SimpleStrReader + fun toStringContext(): String = "SimpleStrReader(pos=$pos, peek='${peekChar()}')" + + companion object { + private class Impl(val str: String, override var pos: Int) : SimpleStrReader { + override val hasMore: Boolean get() = pos < str.length + override fun readChar(): Char = peekChar().also { pos++ } + override fun peekChar(): Char = str.getOrElse(pos) { '\u0000' } + override fun skip(count: Int): SimpleStrReader = this.also { pos += count } + override fun clone(): SimpleStrReader = Impl(str, pos) + override fun toString(): String = "Impl(str='$str', pos=$pos)" + override fun toStringContext(): String = "SimpleStrReader(pos=$pos, around='${str.substring(pos.coerceAtMost(str.length), (pos + 10).coerceAtMost(str.length))}')" + } + operator fun invoke(str: String, pos: Int = 0): SimpleStrReader = Impl(str, pos) + } +} diff --git a/korlibs-string/src/korlibs/util/StrReaderCharReader.kt b/korlibs-string/src/korlibs/util/StrReaderCharReader.kt new file mode 100644 index 0000000..5971aa6 --- /dev/null +++ b/korlibs-string/src/korlibs/util/StrReaderCharReader.kt @@ -0,0 +1,39 @@ +package korlibs.util + +import korlibs.io.stream.* + +class StrReaderCharReader(val reader: SimpleStrReader) : CharReader { + override fun read(out: StringBuilder, count: Int): Int { + for (n in 0 until count) { + if (!reader.hasMore) return n + out.append(reader.readChar()) + } + return count + } + override fun clone(): CharReader = StrReaderCharReader(reader.clone()) +} + +class CharReaderStrReader(val reader: CharReader, val buffer: StringBuilder = StringBuilder(), var bufferPos: Int = 0) : SimpleStrReader { + private fun ensureBuffer(): Int { + if (bufferPos >= buffer.length) { + buffer.clear() + reader.read(buffer, 1024) + bufferPos = 0 + } + return buffer.length - bufferPos + } + + override var pos: Int = 0 + override val hasMore: Boolean get() = ensureBuffer() > 0 + + override fun readChar(): Char { + return peekChar().also { bufferPos++ } + } + + override fun peekChar(): Char { + if (ensureBuffer() <= 0) return '\u0000' + return buffer[bufferPos] + } + + override fun clone(): SimpleStrReader = CharReaderStrReader(reader.clone(), StringBuilder(buffer), bufferPos) +} diff --git a/korlibs-string/src/korlibs/util/Stringformat.kt b/korlibs-string/src/korlibs/util/Stringformat.kt new file mode 100644 index 0000000..f744d22 --- /dev/null +++ b/korlibs-string/src/korlibs/util/Stringformat.kt @@ -0,0 +1,33 @@ +package korlibs.util + +private val formatRegex = Regex("%([-]?\\d+)?(\\w)") + +fun String.format(vararg params: Any): String { + var paramIndex = 0 + return formatRegex.replace(this) { mr -> + val param = params[paramIndex++] + //println("param: $param") + val size = mr.groupValues[1] + val type = mr.groupValues[2] + val str = when (type) { + "d" -> (param as Number).toLong().toString() + "X", "x" -> { + val res = when (param) { + is Int -> param.toUInt().toString(16) + else -> (param as Number).toLong().toULong().toString(16) + } + if (type == "X") res.uppercase() else res.lowercase() + } + else -> "$param" + } + val prefix = if (size.startsWith('0')) '0' else ' ' + val asize = size.toIntOrNull() + var str2 = str + if (asize != null) { + while (str2.length < asize) { + str2 = prefix + str2 + } + } + str2 + } +} diff --git a/korlibs-string/src@apple/korlibs/io/lang/Charset.apple.kt b/korlibs-string/src@apple/korlibs/io/lang/Charset.apple.kt new file mode 100644 index 0000000..207177b --- /dev/null +++ b/korlibs-string/src@apple/korlibs/io/lang/Charset.apple.kt @@ -0,0 +1,4 @@ +package korlibs.io.lang + +internal actual val platformCharsetProvider: CharsetProvider = + CharsetProvider { normalizedName, name -> runCatching { CharsetApple(name) }.getOrNull() } diff --git a/korlibs-string/src@apple/korlibs/io/lang/CharsetApple.kt b/korlibs-string/src@apple/korlibs/io/lang/CharsetApple.kt new file mode 100644 index 0000000..9fa88ec --- /dev/null +++ b/korlibs-string/src@apple/korlibs/io/lang/CharsetApple.kt @@ -0,0 +1,51 @@ +package korlibs.io.lang + +import korlibs.memory.ByteArrayBuilder +import korlibs.platform.toByteArray +import korlibs.platform.toNSData +import korlibs.platform.useCFStringRef +import kotlinx.cinterop.ExperimentalForeignApi +import kotlinx.cinterop.UnsafeNumber +import platform.CoreFoundation.CFStringConvertEncodingToNSStringEncoding +import platform.CoreFoundation.CFStringConvertIANACharSetNameToEncoding +import platform.CoreFoundation.kCFStringEncodingInvalidId +import platform.Foundation.NSString +import platform.Foundation.NSStringEncoding +import platform.Foundation.create +import platform.Foundation.dataUsingEncoding + +@OptIn(UnsafeNumber::class, ExperimentalForeignApi::class) +class CharsetApple(name: String) : Charset(name) { + private val encoding: NSStringEncoding = getEncoding(name) + + private fun getEncoding(name: String): NSStringEncoding { + val encoding = name.uppercase().useCFStringRef { CFStringConvertEncodingToNSStringEncoding(CFStringConvertIANACharSetNameToEncoding(it)) } + if (encoding.toUInt() != kCFStringEncodingInvalidId) { + return encoding + } else { + throw IllegalArgumentException("Charset $name is not supported by apple.") + } + } + + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + if (end == 0) return 0 + + val data = src.copyOfRange(start, end).toNSData() + + val content = NSString.create(data, this.encoding) as? String + ?: throw MalformedInputException("Failed to convert Bytes to String using $name") + + out.append(content) + + return src.size + } + + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + val content = src.substring(start, end) as? NSString ?: error("Failed to convert input to NSString.") + val data = content.dataUsingEncoding(this.encoding)?.toByteArray() + ?: throw MalformedInputException("Failed to convert String to Bytes using $name") + + out.append(data) + } +} diff --git a/korlibs-string/src@js/korlibs/io/lang/Charset.js.kt b/korlibs-string/src@js/korlibs/io/lang/Charset.js.kt new file mode 100644 index 0000000..dc70743 --- /dev/null +++ b/korlibs-string/src@js/korlibs/io/lang/Charset.js.kt @@ -0,0 +1,45 @@ +package korlibs.io.lang + +import korlibs.memory.ByteArrayBuilder +import org.khronos.webgl.ArrayBufferView +import org.khronos.webgl.Uint8Array + +external class TextDecoder(charset: String) { + val encoding: String + fun decode(data: ArrayBufferView): String +} + +external class TextEncoder(charset: String) { + val encoding: String + fun encode(data: String): Uint8Array +} + +internal actual val platformCharsetProvider: CharsetProvider = CharsetProvider { normalizedName, name -> + for (n in listOf(name, normalizedName)) { + try { + val te = TextEncoder(n) + val td = TextDecoder(n) + return@CharsetProvider JsCharset(te, td) + } catch (e: dynamic) { + continue + } + } + return@CharsetProvider null +} + +class JsCharset(val textEncoder: TextEncoder, val textDecoder: TextDecoder) : Charset(textDecoder.encoding) { + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + if (textEncoder.encoding != textDecoder.encoding) throw UnsupportedOperationException("Unsupported encoding '${textDecoder.encoding}'") + out.append(textEncoder.encode(src.substring(start, end)).unsafeCast()) + } + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + out.append(textDecoder.decode(src.unsafeCast().subarray(start, end))) + // @TODO: This charset won't support partial characters. + return end - start + } + + override fun equals(other: Any?): Boolean = other is JsCharset && this.name == other.name + override fun hashCode(): Int = name.hashCode() + override fun toString(): String = "JsCharset($name)" +} diff --git a/korlibs-string/src@jvmAndAndroid/korlibs/io/lang/Charset.jvmAndAndroid.kt b/korlibs-string/src@jvmAndAndroid/korlibs/io/lang/Charset.jvmAndAndroid.kt new file mode 100644 index 0000000..98f8413 --- /dev/null +++ b/korlibs-string/src@jvmAndAndroid/korlibs/io/lang/Charset.jvmAndAndroid.kt @@ -0,0 +1,34 @@ +package korlibs.io.lang + +import korlibs.memory.ByteArrayBuilder +import java.nio.ByteBuffer +import java.nio.CharBuffer +import java.nio.charset.* +import java.nio.charset.Charset as JCharset + +internal actual val platformCharsetProvider: CharsetProvider = CharsetProvider { normalizedName, name -> + for (n in listOf(name, normalizedName)) { + if (JCharset.isSupported(n)) return@CharsetProvider JvmCharset(JCharset.forName(n)) + } + return@CharsetProvider null +} + +class JvmCharset(val charset: JCharset) : Charset(charset.name()) { + val decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.IGNORE) + val encoder = charset.newEncoder().onMalformedInput(CodingErrorAction.IGNORE) + + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + val bb = encoder.encode(CharBuffer.wrap(src, start, end)) + out.append(ByteArray(bb.remaining()).also { bb.get(it) }) + } + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + val bb = ByteBuffer.wrap(src, start, end - start) + out.append(decoder.decode(bb)) + return bb.position() - start + } + + override fun equals(other: Any?): Boolean = other is JvmCharset && this.charset == other.charset + override fun hashCode(): Int = charset.hashCode() + override fun toString(): String = "JvmCharset($name)" +} diff --git a/korlibs-string/src@linux/korlibs/io/lang/Charset.linux.kt b/korlibs-string/src@linux/korlibs/io/lang/Charset.linux.kt new file mode 100644 index 0000000..dd1150b --- /dev/null +++ b/korlibs-string/src@linux/korlibs/io/lang/Charset.linux.kt @@ -0,0 +1,3 @@ +package korlibs.io.lang + +internal actual val platformCharsetProvider: CharsetProvider = CharsetProvider { normalizedName, _ -> null } \ No newline at end of file diff --git a/korlibs-string/src@mingw/korlibs/io/lang/Charset.mingw.kt b/korlibs-string/src@mingw/korlibs/io/lang/Charset.mingw.kt new file mode 100644 index 0000000..4358eeb --- /dev/null +++ b/korlibs-string/src@mingw/korlibs/io/lang/Charset.mingw.kt @@ -0,0 +1,3 @@ +package korlibs.io.lang + +internal actual val platformCharsetProvider: CharsetProvider = CharsetProvider { normalizedName, _ -> null } diff --git a/korlibs-string/src@wasm/korlibs/io/lang/Charset.wasm.kt b/korlibs-string/src@wasm/korlibs/io/lang/Charset.wasm.kt new file mode 100644 index 0000000..231a748 --- /dev/null +++ b/korlibs-string/src@wasm/korlibs/io/lang/Charset.wasm.kt @@ -0,0 +1,92 @@ +package korlibs.io.lang + +import korlibs.memory.* +import org.khronos.webgl.* + +external class TextDecoder(charset: String) : JsAny { + val encoding: String + fun decode(data: ArrayBufferView): String +} + +external class TextEncoder(charset: String) : JsAny { + val encoding: String + fun encode(data: String): Uint8Array +} + + +internal actual val platformCharsetProvider: CharsetProvider = CharsetProvider { normalizedName, name -> + for (n in listOf(name, normalizedName)) { + try { + val te = wrapWasmJsExceptions { TextEncoder(n) } + val td = wrapWasmJsExceptions { TextDecoder(n) } + return@CharsetProvider JsCharset(te, td) + //} catch (e: dynamic) { // @TODO: Not working on WASM. Do we really have a Throwable from JS? + } catch (e: Throwable) { + continue + } + } + return@CharsetProvider null +} + +class JsCharset(val textEncoder: TextEncoder, val textDecoder: TextDecoder) : Charset(textDecoder.encoding) { + override fun encode(out: ByteArrayBuilder, src: CharSequence, start: Int, end: Int) { + if (textEncoder.encoding != textDecoder.encoding) throw UnsupportedOperationException("Unsupported encoding '${textDecoder.encoding}'") + out.append(textEncoder.encode(src.substring(start, end)).toByteArray()) + } + + override fun decode(out: StringBuilder, src: ByteArray, start: Int, end: Int): Int { + out.append(textDecoder.decode(src.toInt8Array().subarray(start, end))) + // @TODO: This charset won't support partial characters. + return end - start + } + + override fun equals(other: Any?): Boolean = other is JsCharset && this.name == other.name + override fun hashCode(): Int = name.hashCode() + override fun toString(): String = "JsCharset($name)" +} + +@JsFun("(block) => { try { return { result: block(), error: null }; } catch (e) { return { result: null, error: e }; } }") +private external fun runCatchingJsExceptions(block: () -> T): JsResult + +private fun wrapWasmJsExceptions(block: () -> T): T { + val result = runCatchingJsExceptions { block() } + if (result.error != null) throw Exception(result.error!!.message) + return result.result!! +} + +private external interface JsResult : JsAny { + val result: T? + val error: JsError? +} + +@JsName("Error") +private external class JsError : JsAny { + val message: String? +} + +private fun ByteArray.toInt8Array(): Int8Array { + //val tout = this.asDynamic() + //if (tout is Int8Array) { + // return tout.unsafeCast() + //} else { + val out = Int8Array(this.size) + for (n in 0 until out.length) out[n] = this[n] + return out + //} +} + +private fun ArrayBuffer.toByteArray(): ByteArray = Int8Array(this).toByteArray() +private fun Uint8Array.toByteArray(): ByteArray { + return Int8Array(this.buffer).toByteArray() +} +private fun Int8Array.toByteArray(): ByteArray { + //val tout = this.asDynamic() + //if (tout is ByteArray) { + // return tout.unsafeCast() + //} else { + val out = ByteArray(this.length) + for (n in out.indices) out[n] = this[n] + return out + //} +} + diff --git a/korlibs-string/test/korlibs/io/lang/StringCaseTest.kt b/korlibs-string/test/korlibs/io/lang/StringCaseTest.kt new file mode 100644 index 0000000..f39863e --- /dev/null +++ b/korlibs-string/test/korlibs/io/lang/StringCaseTest.kt @@ -0,0 +1,26 @@ +package korlibs.io.lang + +import kotlin.test.* + +class StringCaseTest { + @Test + fun testToCase() { + val case = StringCase("hi", "there") + assertEquals("HiThere", case.pascalCase) + assertEquals("hiThere", case.camelCase) + assertEquals("hi there", case.spaceCase) + assertEquals("hi-there", case.kebabCase) + assertEquals("hi_there", case.snakeCase) + assertEquals("HI_THERE", case.screamingSnakeCase) + } + + @Test + fun testDetect() { + assertEquals(listOf("hello", "world"), "hello world".case.words) + assertEquals(listOf("hello", "world"), "helloWorld".case.words) + assertEquals(listOf("hello", "world"), "HELLO_WORLD".case.words) + assertEquals(listOf("hello", "world"), "hello-world".case.words) + assertEquals(listOf("foo", "1337"), "foo1337".case.words) + assertEquals(listOf("1a", "2b", "test"), "1a2bTest".case.words) + } +} diff --git a/korlibs-string/test/korlibs/util/SimpleIndenterTest.kt b/korlibs-string/test/korlibs/util/SimpleIndenterTest.kt new file mode 100644 index 0000000..86a46c5 --- /dev/null +++ b/korlibs-string/test/korlibs/util/SimpleIndenterTest.kt @@ -0,0 +1,25 @@ +package korlibs.util + +import kotlin.test.* + +class SimpleIndenterTest { + @Test + fun testIndents() { + assertEquals("\t".repeat(10), SimpleIndenter.INDENTS[10]) + assertEquals("\t".repeat(1), SimpleIndenter.INDENTS[1]) + assertEquals("\t".repeat(0), SimpleIndenter.INDENTS[0]) + assertEquals("\t".repeat(100), SimpleIndenter.INDENTS[100]) + } + + @Test + fun testIndenter() { + val indenter = SimpleIndenter() + indenter.inline("hello") + indenter.line(" world {") + indenter.indent { + indenter.line("hi") + } + indenter.line("}") + assertEquals("hello world {\n\thi\n}", indenter.toString()) + } +}