Skip to content

Commit

Permalink
feat: Allow supported boolean Unicode properties in Java
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed Nov 30, 2024
1 parent 7a4c0d8 commit 1ca33b2
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 4 deletions.
10 changes: 10 additions & 0 deletions pomsky-lib/src/exprs/char_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,16 @@ fn named_class_to_regex_unicode(
_ => {}
}
set.add_prop(RegexProperty::Other(o).negative_item(negative));
} else if flavor == RF::Java {
if pomsky_syntax::props_supported_in_java().binary_search(&o.as_str()).is_ok() {
set.add_prop(RegexProperty::Other(o).negative_item(negative));
} else {
return Err(CompileErrorKind::Unsupported(
Feature::SpecificUnicodeProp,
flavor,
)
.at(span));
}
} else {
return Err(CompileErrorKind::Unsupported(Feature::UnicodeProp, flavor).at(span));
}
Expand Down
1 change: 0 additions & 1 deletion pomsky-lib/src/regex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,6 @@ impl RegexProperty {
},
RegexProperty::Other(o) => {
if flavor == RegexFlavor::Java {
// Currently disabled since only some boolean properties are supported in Java
buf.push_str("Is");
}
buf.push_str(o.as_str());
Expand Down
20 changes: 20 additions & 0 deletions pomsky-syntax/JavaSupportedProps.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Alphabetic
Assigned
Control
Digit
Emoji
Emoji_Component
Emoji_Modifier
Emoji_Modifier_Base
Emoji_Presentation
Extended_Pictographic
Hex_Digit
Ideographic
Join_Control
Letter
Lowercase
Noncharacter_Code_Point
Punctuation
Titlecase
Uppercase
White_Space
11 changes: 10 additions & 1 deletion pomsky-syntax/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ fn main() {
println!("cargo:rerun-if-changed=PropertyValueAliases.txt");
println!("cargo:rerun-if-changed=SupportedBooleanProps.txt");
println!("cargo:rerun-if-changed=DotNetSupportedBlocks.txt");
println!("cargo:rerun-if-changed=JavaSupportedProps.txt");
generate_unicode_data();
}

Expand All @@ -19,6 +20,9 @@ fn generate_unicode_data() {
let [categories, scripts, blocks, bools] = parse_aliases(&aliases, &blocks);

let dotnet_blocks = std::fs::read_to_string("DotNetSupportedBlocks.txt").unwrap();
let java_props = std::fs::read_to_string("JavaSupportedProps.txt").unwrap();
assert!(dotnet_blocks.lines().is_sorted());
assert!(java_props.lines().is_sorted());

let out_dir = std::env::var("OUT_DIR").unwrap();
let path = std::path::Path::new(&out_dir).join("unicode_data.rs");
Expand Down Expand Up @@ -104,14 +108,19 @@ static PARSE_LUT: &[(&str, GroupName)] = &[
static DOTNET_SUPPORTED: &[&str] = &[
{dotnet_supported}];
static JAVA_SUPPORTED: &[&str] = &[
{java_supported}];
",
category_enum = generate_enum("Category", &categories, 0, 1),
script_enum = generate_enum("Script", &scripts, 1, 1),
block_enum = generate_enum("CodeBlock", &blocks, 1, 1),
other_enum = generate_enum("OtherProperties", &bools, 1, 1),
lut = lut.join(",\n "),
dotnet_supported =
dotnet_blocks.lines().map(|line| format!(" {line:?},\n")).collect::<String>()
dotnet_blocks.lines().map(|line| format!(" {line:?},\n")).collect::<String>(),
java_supported =
java_props.lines().map(|line| format!(" {line:?},\n")).collect::<String>(),
),
)
.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion pomsky-syntax/src/exprs/char_class/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ mod ascii;
pub(crate) mod char_group;
pub(crate) mod unicode;

pub use unicode::{blocks_supported_in_dotnet, list_shorthands};
pub use unicode::{blocks_supported_in_dotnet, list_shorthands, props_supported_in_java};

/// A _character class_. Refer to the [module-level documentation](self) for
/// details.
Expand Down
4 changes: 4 additions & 0 deletions pomsky-syntax/src/exprs/char_class/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ pub fn blocks_supported_in_dotnet() -> &'static [&'static str] {
DOTNET_SUPPORTED
}

pub fn props_supported_in_java() -> &'static [&'static str] {
JAVA_SUPPORTED
}

/// Returns the list of all accepted shorthands.
pub fn list_shorthands() -> impl Iterator<Item = (&'static str, GroupName)> {
PARSE_LUT.iter().copied()
Expand Down
2 changes: 1 addition & 1 deletion pomsky-syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ pub use util::find_suggestion;
use pretty_print::PrettyPrinter;

#[doc(hidden)]
pub use exprs::char_class::{blocks_supported_in_dotnet, list_shorthands};
pub use exprs::char_class::{blocks_supported_in_dotnet, list_shorthands, props_supported_in_java};

0 comments on commit 1ca33b2

Please sign in to comment.