diff --git a/Sources/DeclarativeTextKit/Buffer/Buffer.swift b/Sources/DeclarativeTextKit/Buffer/Buffer.swift index 549038a..103a70b 100644 --- a/Sources/DeclarativeTextKit/Buffer/Buffer.swift +++ b/Sources/DeclarativeTextKit/Buffer/Buffer.swift @@ -235,177 +235,122 @@ let wordBoundary: CharacterSet = .whitespacesAndNewlines .union(.symbols) .union(.illegalCharacters) // Not tested +extension CharacterSet { + @usableFromInline + static let nonWhitespaceOrNewlines: CharacterSet = .whitespacesAndNewlines.inverted +} + extension Buffer { @inlinable public func wordRange( for baseRange: Buffer.Range ) throws -> Buffer.Range { - guard self.contains(range: baseRange) else { throw BufferAccessFailure.outOfRange(requested: baseRange, available: self.range) } // This bridging overhead isn't ideal while we operate on `Swift.String` as the `Buffer.Content`. It makes NSRange-based string enumeration easier. As long as `wordRange(for:)` is used to apply commands on the user's behalf via DeclarativeTextKit, we should be okay in practice even for longer document. Repeated calls to this function, e.g. in loops, could be a disaster, though. See commit d434030e6d9366941c5cc3fa9c6de860afb74710 for an approach that uses two while loops instead. let nsContent = (self.content as NSString) - func isWordSeparator( - _ characterSequence: NSString, - wordBoundary: CharacterSet - ) -> Bool { - return characterSequence.rangeOfCharacter(from: wordBoundary) == NSRange(location: 0, length: characterSequence.length) + func expanding( + range searchRange: NSRange, + upToCharactersFrom characterSet: CharacterSet + ) -> Buffer.Range { + var expandedRange = searchRange + expandedRange = expanding(range: expandedRange, upToCharactersFrom: characterSet, direction: .upstream) + expandedRange = expanding(range: expandedRange, upToCharactersFrom: characterSet, direction: .downstream) + return expandedRange } - func matchedRange( - in searchRange: NSRange, - wordBoundary: CharacterSet - ) -> (start: Buffer.Location, end: Buffer.Location) { - var start = searchRange.location - nsContent.enumerateSubstrings( - in: Buffer.Range( - location: self.range.location, - // Account for start locations >0 (e.g. in ScopedBufferSlice) in length calculation - length: searchRange.location - self.range.location - ), - options: [.byComposedCharacterSequences, .reverse] - ) { characterSequence, characterSequenceRange, enclosingRange, stop in - guard let characterSequence = characterSequence as? NSString - else { assertionFailure(); return } - if isWordSeparator(characterSequence, wordBoundary: wordBoundary) { - stop.pointee = true - } else { - start = characterSequenceRange.location - } - } - - var end = searchRange.endLocation - nsContent.enumerateSubstrings( - in: Buffer.Range( - location: searchRange.endLocation, - // Account for start locations >0 (e.g. in ScopedBufferSlice) in length calculation - length: self.range.endLocation - searchRange.endLocation - ), - options: [.byComposedCharacterSequences] - ) { characterSequence, characterSequenceRange, enclosingRange, stop in - guard let characterSequence = characterSequence as? NSString - else { assertionFailure(); return } - if isWordSeparator(characterSequence, wordBoundary: wordBoundary) { - stop.pointee = true - } else { - end = characterSequenceRange.endLocation - } + func expanding( + range searchRange: NSRange, + upToCharactersFrom characterSet: CharacterSet, + direction: Direction + ) -> Buffer.Range { + switch direction { + case .upstream: + let matchedLocation = nsContent.locationUpToCharacter( + from: characterSet, + direction: .upstream, + in: self.range.prefix(upTo: searchRange) + ) + return Buffer.Range( + startLocation: matchedLocation ?? self.range.location, // If nothing was found, expand to start of the available range. + endLocation: searchRange.endLocation + ) + case .downstream: + let matchedLocation = nsContent.locationUpToCharacter( + from: characterSet, + direction: .downstream, + in: self.range.suffix(after: searchRange) + ) + return Buffer.Range( + startLocation: searchRange.location, + endLocation: matchedLocation ?? self.range.endLocation // If nothing was found, expand to end of the available range. + ) } - - return (start, end) } - func firstNonSkippable( - location: Buffer.Location, - wordBoundary: CharacterSet, - reverse: Bool - ) -> Buffer.Location? { - var options: NSString.EnumerationOptions = [.byComposedCharacterSequences] - let searchRange: Buffer.Range - if reverse { - options.insert(.reverse) - if location < self.range.location { - return nil // at beginning of buffer - } - searchRange = Buffer.Range( - location: self.range.location, - // TODO: In ScopedSliceBuffer, this may fail because `length: location` assumes `self.range` starts from 0. See range checks below. Haven't found a failing case for this yet, though. - length: location - ) - } else { - if location >= self.range.endLocation { - return nil // at end of buffer - } - searchRange = Buffer.Range( - location: location, - // TODO: In ScopedSliceBuffer, this may fail because `self.range.length - location` assumes `self.range` starts from the 0 location. See range checks below. Haven't found a failing case for this yet, though. - length: self.range.length - location + func trimmingWhitespace(range: Buffer.Range) -> Buffer.Range { + var result = range + + // Trim trailing whitespace first, favoring upstream selection affinity, e.g. if `baseRange` is all whitespace. + if let newEndLocation = nsContent.locationUpToCharacter( + from: .nonWhitespaceOrNewlines, + direction: .upstream, + in: result.expanded(to: self.range, direction: .upstream)) + { + result = Buffer.Range( + startLocation: result.location, + endLocation: max(newEndLocation, result.location) // If newEndLocation < location, the whole of searchRange is whitespace. ) } - var result: Buffer.Location? = nil - nsContent.enumerateSubstrings( - in: searchRange, - options: options - ) { characterSequence, characterSequenceRange, enclosingRange, stop in - guard let characterSequence = characterSequence as? NSString - else { assertionFailure(); return } - if isWordSeparator(characterSequence, wordBoundary: wordBoundary) { - // Skip whitespace - } else { - result = reverse - ? characterSequenceRange.endLocation // skip up to *after* the match coming from right - : characterSequenceRange.location - stop.pointee = true - } + // Trim leading whitespace + if let newStartLocation = nsContent.locationUpToCharacter( + from: .nonWhitespaceOrNewlines, + direction: .downstream, + in: result.expanded(to: self.range, direction: .downstream)) + { + result = Buffer.Range( + startLocation: min(newStartLocation, result.endLocation), // If newStartLocation > endLocation, the whole searchRange is whitespace. + endLocation: result.endLocation + ) } - return result - } - var searchRange = baseRange - - // Trim trailing whitespace first, favoring upstream selection affinity, e.g. if `baseRange` is all whitespace. - if searchRange.length > 0 { - let newEndLocation = firstNonSkippable( - location: searchRange.endLocation, - wordBoundary: .whitespacesAndNewlines, - reverse: true - ) ?? baseRange.endLocation - if newEndLocation < searchRange.location { - // Flipped locations indicate that the whole of searchRange is whitespace. - // TODO: Could also set searchRange.location to its (endLocation-1) when trimming the length for downstream affinity. - searchRange.length = 0 - } else { - searchRange.endLocation = newEndLocation - } - } - // Trim leading whitespace - if searchRange.length > 0 { - let newStartLocation = firstNonSkippable( - location: searchRange.location, - wordBoundary: .whitespacesAndNewlines, - reverse: false - ) ?? baseRange.location - if newStartLocation >= searchRange.endLocation { - // Flipped locations indicate that the whole of searchRange is whitespace. - // TODO: While symmetrical, this doesn't actually change the effect. - // searchRange.location = newStartLocation - // searchRange.length = 0 - } else { - searchRange.location = newStartLocation - searchRange.length -= (newStartLocation - baseRange.location) - } + return result } - var (start, end) = matchedRange(in: searchRange, wordBoundary: wordBoundary) + func nonWhitespaceLocation(closestTo location: Buffer.Location) -> Buffer.Location? { + let downstreamNonWhitespaceLocation = nsContent.locationUpToCharacter(from: .nonWhitespaceOrNewlines, direction: .downstream, in: self.range.suffix(after: location)) + let upstreamNonWhitespaceLocation = nsContent.locationUpToCharacter(from: .nonWhitespaceOrNewlines, direction: .upstream, in: self.range.prefix(upTo: location)) - // If the result is an empty range, characters adjacent to the location were all `wordBoundary` characters. Then we need to try again with relaxed conditions, skipping over whitespace first. Try forward search, then backward. - if start == end { - let downstreamNonWhitespaceLocation = firstNonSkippable(location: start, wordBoundary: .whitespacesAndNewlines, reverse: false) - let upstreamNonWhitespaceLocation = firstNonSkippable(location: start, wordBoundary: .whitespacesAndNewlines, reverse: true) - // Prioritize look-behind over look-ahead *only* of the point is left-adjacent to non-whitespace character and the look-ahead is further away. + // Prioritize look-behind over look-ahead iff the location is downstream of a non-whitespace character (non-whitespace to the left of it) and the look-ahead is further away. if let upstreamNonWhitespaceLocation, let downstreamNonWhitespaceLocation, - (upstreamNonWhitespaceLocation ..< start).count == 0, - (start ..< downstreamNonWhitespaceLocation).count > 0 { - (start, end) = matchedRange(in: .init(location: upstreamNonWhitespaceLocation, length: 0), wordBoundary: .whitespacesAndNewlines) - } else if let location = downstreamNonWhitespaceLocation ?? upstreamNonWhitespaceLocation { - (start, end) = matchedRange(in: .init(location: location, length: 0), wordBoundary: .whitespacesAndNewlines) + (upstreamNonWhitespaceLocation ..< location).count == 0, + downstreamNonWhitespaceLocation > location { + return upstreamNonWhitespaceLocation } + + return downstreamNonWhitespaceLocation ?? upstreamNonWhitespaceLocation } - let result = Buffer.Range( - location: start, - length: end - start + var resultRange = expanding( + range: trimmingWhitespace(range: baseRange), + upToCharactersFrom: wordBoundary ) + // If the result is an empty range, characters adjacent to the location were all `wordBoundary` characters. Then we need to try again with relaxed conditions, skipping over whitespace first. Try forward search, then backward. + if resultRange.length == 0, + let closestNonWhitespaceLocation = nonWhitespaceLocation(closestTo: resultRange.location) { + resultRange = expanding(range: .init(location: closestNonWhitespaceLocation, length: 0), upToCharactersFrom: .whitespacesAndNewlines) + } + // When the input range covered only whitespace and nothing was found, discard the resulting empty range in favor of the original. - if result.length == 0, result != baseRange { + if resultRange.length == 0, resultRange != baseRange { return baseRange } - return result + return resultRange } } diff --git a/Sources/DeclarativeTextKit/Buffer/NSMutableString+BufferCompatibility.swift b/Sources/DeclarativeTextKit/Buffer/NSMutableString+BufferCompatibility.swift index e716e87..e6408df 100644 --- a/Sources/DeclarativeTextKit/Buffer/NSMutableString+BufferCompatibility.swift +++ b/Sources/DeclarativeTextKit/Buffer/NSMutableString+BufferCompatibility.swift @@ -5,7 +5,7 @@ import Foundation extension NSMutableString { @usableFromInline func unsafeCharacter(at location: Buffer.Location) -> Buffer.Content { - return unsafeContent(in: self.rangeOfComposedCharacterSequences(for: .init(location: location, length: 1))) + return unsafeContent(in: self.rangeOfComposedCharacterSequence(at: location)) } @usableFromInline diff --git a/Sources/DeclarativeTextKit/Buffer/UTF16.swift b/Sources/DeclarativeTextKit/Buffer/UTF16.swift index cac232f..93866f3 100644 --- a/Sources/DeclarativeTextKit/Buffer/UTF16.swift +++ b/Sources/DeclarativeTextKit/Buffer/UTF16.swift @@ -16,6 +16,15 @@ extension UTF16Range { length = newValue - location } } + + @inlinable @inline(__always) + init( + startLocation: UTF16Offset, + endLocation: UTF16Offset + ) { + precondition(startLocation <= endLocation) + self.init(location: startLocation, length: endLocation - startLocation) + } } public func length(of string: NSString) -> UTF16Length { diff --git a/Sources/DeclarativeTextKit/NSRange+expandedToOther.swift b/Sources/DeclarativeTextKit/NSRange+expandedToOther.swift new file mode 100644 index 0000000..bd5360a --- /dev/null +++ b/Sources/DeclarativeTextKit/NSRange+expandedToOther.swift @@ -0,0 +1,28 @@ +// Copyright © 2024 Christian Tietze. All rights reserved. Distributed under the MIT License. + +import Foundation + +extension NSRange { + @inlinable + func expanded( + to other: NSRange, + direction: Direction + ) -> NSRange { + precondition(other.location <= self.location && other.endLocation >= self.endLocation, "Expansion requires other range to be larger") + + let startLocation = switch direction { + case .upstream: other.location + case .downstream: self.location + } + + let endLocation = switch direction { + case .upstream: self.endLocation + case .downstream: other.endLocation + } + + return NSRange( + startLocation: startLocation, + endLocation: endLocation + ) + } +} diff --git a/Sources/DeclarativeTextKit/NSRange+prefix.swift b/Sources/DeclarativeTextKit/NSRange+prefix.swift new file mode 100644 index 0000000..bbb0e03 --- /dev/null +++ b/Sources/DeclarativeTextKit/NSRange+prefix.swift @@ -0,0 +1,22 @@ +// Copyright © 2024 Christian Tietze. All rights reserved. Distributed under the MIT License. + +import Foundation + +extension NSRange { + /// - Returns: Subrange that ends before `other`. + @inlinable + func prefix(upTo other: NSRange) -> NSRange { + return prefix(upTo: other.location) + } + + /// - Returns: Subrange that ends before `location`. + @inlinable + func prefix(upTo location: Int) -> NSRange { + precondition(self.location <= location && self.endLocation >= location, "Prefix requires range to reach up to or encompass location") + + return NSRange( + startLocation: self.location, + endLocation: location + ) + } +} diff --git a/Sources/DeclarativeTextKit/NSRange+suffix.swift b/Sources/DeclarativeTextKit/NSRange+suffix.swift new file mode 100644 index 0000000..024b358 --- /dev/null +++ b/Sources/DeclarativeTextKit/NSRange+suffix.swift @@ -0,0 +1,22 @@ +// Copyright © 2024 Christian Tietze. All rights reserved. Distributed under the MIT License. + +import Foundation + +extension NSRange { + /// - Returns: Subrange that starts after `other`. + @inlinable + func suffix(after other: NSRange) -> NSRange { + return suffix(after: other.endLocation) + } + + /// - Returns: Subrange that starts after `location`. + @inlinable + func suffix(after location: Int) -> NSRange { + precondition(self.location <= location && self.endLocation >= location, "Suffix requires range to start right after or encompass location") + + return NSRange( + startLocation: location, + endLocation: self.endLocation + ) + } +} diff --git a/Sources/DeclarativeTextKit/NSString+locationUpToCharacter.swift b/Sources/DeclarativeTextKit/NSString+locationUpToCharacter.swift new file mode 100644 index 0000000..2bf87b7 --- /dev/null +++ b/Sources/DeclarativeTextKit/NSString+locationUpToCharacter.swift @@ -0,0 +1,63 @@ +// Copyright © 2024 Christian Tietze. All rights reserved. Distributed under the MIT License. + +import Foundation + +@usableFromInline +enum Direction { + /// Left-to-right or towards-the-end search in a string. + case downstream + /// Right-to-left or towards-the-beginning search in a string. + case upstream +} + +extension NSString { + /// Finds and returns the location adjacent to the first character from `characterSet` found in the substring `range` in `direction`. + /// + /// Use this to find an insertion point _next to_ a match to insert text into. + /// + /// Unlike `NSString.rangeOfCharacter(from:options:range:)`, handles Emoji as composed UTF-16 character sequences properly. + /// + /// ## Example of 'adjacency' + /// + /// To finding location _up to_ a character from `CharacterSet.punctuationCharacters` in this piece of text: + /// + /// "a (test) text" + /// + /// will be one of the following, with the location denoted by `ˇ`: + /// + /// "a ˇ(test) text" // Downstream / from left to right + /// "a (test)ˇ text" // Upstream / from right to left + @usableFromInline + func locationUpToCharacter( + from characterSet: CharacterSet, + direction: Direction, + in range: NSRange + ) -> Buffer.Location? { + var result: Buffer.Location? + + var options: NSString.EnumerationOptions = [.byComposedCharacterSequences] + if direction == .upstream { + options.insert(.reverse) + } + self.enumerateSubstrings(in: range, options: options) { characterSequence, characterSequenceRange, enclosingRange, stop in + guard let characterSequence = characterSequence as? NSString + else { assertionFailure(); return } + if characterSet.contains(characterSequence: characterSequence) { + result = switch direction { + case .upstream: characterSequenceRange.endLocation + case .downstream: characterSequenceRange.location + } + stop.pointee = true + } + } + + return result + } +} + +extension CharacterSet { + @inlinable + func contains(characterSequence: NSString) -> Bool { + return characterSequence.rangeOfCharacter(from: self) == NSRange(location: 0, length: characterSequence.length) + } +}