diff --git a/Cargo.lock b/Cargo.lock index 2f4203f39..e03c50c85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3252,6 +3252,7 @@ dependencies = [ "paste", "rand", "rayon", + "regex", "rustls", "serde", "serde_yaml", diff --git a/Cargo.toml b/Cargo.toml index 37f287f0f..1f0267bd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ webpki-roots = { version = "0.25.0", optional = true } viuer = { version = "0.7.1", optional = true } num_cpus = "1.16.0" rayon = "1.8.0" +regex = "1.10.0" [features] audio = ["hodaun", "crossbeam-channel", "lockfree"] diff --git a/src/primitive/defs.rs b/src/primitive/defs.rs index 9db7cbb07..527996242 100644 --- a/src/primitive/defs.rs +++ b/src/primitive/defs.rs @@ -1478,6 +1478,11 @@ primitive!( /// [under][now] can be used to time a function. /// ex: ⍜now(5&sl1) (0, Now, Misc, "now"), + /// Parse a regex pattern on a string + /// + /// Returns an array of boxed string, with one string per matching group + /// ex: regex "([a-z]+)" "hello world" + (2, Regex, Misc, "regex"), /// The number of radians in a quarter circle /// /// Equivalent to `divide``2``pi` or `divide``4``tau` diff --git a/src/primitive/mod.rs b/src/primitive/mod.rs index dda169fb7..98aaabc3b 100644 --- a/src/primitive/mod.rs +++ b/src/primitive/mod.rs @@ -16,12 +16,15 @@ use std::{ sync::{ atomic::{self, AtomicUsize}, OnceLock, + Arc }, + collections::HashMap, }; use enum_iterator::{all, Sequence}; use once_cell::sync::Lazy; use rand::prelude::*; +use regex::Regex; use crate::{ algorithm::{fork, loops}, @@ -36,6 +39,10 @@ use crate::{ Uiua, UiuaError, UiuaResult, }; +thread_local! { + pub static REGEX_CACHE: RefCell> = RefCell::new(HashMap::new()); +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Sequence)] pub enum PrimClass { Stack, @@ -573,6 +580,37 @@ impl Primitive { Primitive::InvTrace => trace(env, true)?, Primitive::Dump => dump(env)?, Primitive::Sys(io) => io.run(env)?, + Primitive::Regex => { + let pattern = env.pop(1)?.as_string(env, "Pattern must be a string")?; + let matching = env.pop(1)?.as_string(env, "Matching target must be a string")?; + + let re = REGEX_CACHE.with(|cache_ref| { + let mut cache = cache_ref.borrow_mut(); + let cached_pattern = cache.get(&pattern); + if cached_pattern.is_none() { + let regex = Regex::new(&pattern); + if regex.is_ok() { + cache.insert(pattern.clone(), regex.clone().unwrap()); + } + regex + } else { + Ok(cached_pattern.unwrap().clone()) + } + }); + + if re.is_ok() { + let matches = re.unwrap().find_iter(matching.as_str()) + .map(|m| Function::constant(m.as_str()).into()) + .reduce(|a, b| Value::join(a, b, env).unwrap()); + + env.push(matches.unwrap_or(Array::>::default().into())); + } else { + return Err(env.error(format!( + "Invalid pattern: {}", + pattern + ))) + } + } } Ok(()) } diff --git a/tests/units.ua b/tests/units.ua index 5f5ea70ba..8944419ee 100644 --- a/tests/units.ua +++ b/tests/units.ua @@ -148,3 +148,9 @@ ParseOrZero ← ⍣parse⋅⋅0 ⍤∶≅, 97 -@\0 @a ⍤∶≅, 27 -@\0 @\x1b ⍤∶≅, 4096 -@\0 @\u1000 + +⍤. ↧⊙(≅ "hello" ⊔⊡0) ≅ "world" ⊔⊡1 . regex "([a-z]+)" "hello world" +⍤. ↧⊙(≅ "hello" ⊔⊡0) ≅ "world" ⊔⊡1 . regex "([a-z]+)" "hello world" +⍤. ≅ {} regex "([0-9]+)" "hello world" +⍤. ⍣(regex "([a-z]" "hello world") (1;) +