From 111051707458151273ccd7a3c85a5d737745d1d8 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 17 Jul 2024 10:52:59 +0200 Subject: [PATCH] rewrote squeeze in Rascal with reified classes --- src/org/rascalmpl/library/String.rsc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index de466de527..58713a0a1c 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -20,6 +20,7 @@ module String extend Exception; import List; +import ParseTree; @synopsis{All functions in this module that have a charset parameter use this as default.} private str DEFAULT_CHARSET = "UTF-8"; @@ -522,8 +523,29 @@ squeeze("hello", "el"); ``` } @javaClass{org.rascalmpl.library.Prelude} +@deprecated{Use the other squeence function that accepts Rascal character classes.} public java str squeeze(str src, str charSet); +@synopsis{Squeeze repeated occurrences of characters.} +@description{ +Squeeze repeated occurrences in `src` of characters, if they are a member of `charSet`, removed. + +* `src` is any string +* `&CharClass` is a character class type such as `[a-z]` (a type that is a subtype of the class of all characters `![]`) +} +@pitfalls{ +* `![]` excludes the `0` character, so we can never squeeze the unicode codepoint `0`. We _can_ squeeze the number `0` of course, using `#[0-9]` for example. +} +@examples{ +```rascal-shell +import String; +squeeze("hello", #[el]); +``` +} +public str squeeze(str src, type[&CharClass <: ![]] _) = visit(src) { + case /+/ => c + when &CharClass _ := Tree::char(charAt(c, 0)) +}; @synopsis{Split a string into a list of strings based on a literal separator.}