From 7d5845423950ed045d1cfd5ef0676e7f0c9e766c Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 10:02:48 -0300 Subject: [PATCH 01/10] Add MULTILINE_ONLY flag for regex --- src/regex.cr | 5 +++++ src/regex/pcre2.cr | 1 + 2 files changed, 6 insertions(+) diff --git a/src/regex.cr b/src/regex.cr index 69dd500226a9..421ac6122d6a 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -240,11 +240,15 @@ class Regex # flag that activates both behaviours, so here we do the same by # mapping `MULTILINE` to `PCRE_MULTILINE | PCRE_DOTALL`. # The same applies for PCRE2 except that the native values are 0x200 and 0x400. + # + # For the behaviour of `PCRE_MULTILINE` use `MULTILINE_ONLY`. # Multiline matching. # # Equivalent to `MULTILINE | DOTALL` in PCRE and PCRE2. MULTILINE = 0x0000_0006 + # Equivalent to `MULTILINE`in PCRE and PCRE2. + MULTILINE_ONLY = 0x0000_0040 DOTALL = 0x0000_0002 @@ -600,6 +604,7 @@ class Regex # Regex.new("ab+c", :anchored).inspect # => Regex.new("ab+c", Regex::Options::ANCHORED) # ``` def inspect(io : IO) : Nil + p! options, ~CompileOptions[IGNORE_CASE, MULTILINE, EXTENDED] if (options & ~CompileOptions[IGNORE_CASE, MULTILINE, EXTENDED]).none? inspect_literal(io) else diff --git a/src/regex/pcre2.cr b/src/regex/pcre2.cr index da811225842f..4f955d77e94c 100644 --- a/src/regex/pcre2.cr +++ b/src/regex/pcre2.cr @@ -68,6 +68,7 @@ module Regex::PCRE2 flag |= case option when .ignore_case? then LibPCRE2::CASELESS when .multiline? then LibPCRE2::DOTALL | LibPCRE2::MULTILINE + when .multiline_only? then LibPCRE2::MULTILINE when .dotall? then LibPCRE2::DOTALL when .extended? then LibPCRE2::EXTENDED when .anchored? then LibPCRE2::ANCHORED From de7efd4f9d1038cbb08695b7a649511154295135 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 12:52:18 -0300 Subject: [PATCH 02/10] Added spec for multiline_only --- spec/std/regex_spec.cr | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spec/std/regex_spec.cr b/spec/std/regex_spec.cr index 13d301987c56..af03cb2c79b8 100644 --- a/spec/std/regex_spec.cr +++ b/spec/std/regex_spec.cr @@ -250,6 +250,13 @@ describe "Regex" do end end + describe "multiline_only" do + it "anchor" do + ((/^foo.*$/m).match("foo\nbar")).try(&.[](0)).should eq "foo\nbar" + ((Regex.new("^foo.*?", Regex::Options::MULTILINE_ONLY)).match("foo\nbar")).try(&.[](0)).should eq "foo" + end + end + describe "extended" do it "ignores white space" do /foo bar/.matches?("foobar").should be_false From 50ee32abff71230aaebc68df129fb833034c04cd Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 12:52:57 -0300 Subject: [PATCH 03/10] Added processing for PCRE --- src/regex/pcre.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index e6cf6eaca7b0..eb17a3f5fd80 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -37,6 +37,7 @@ module Regex::PCRE flag |= case option when .ignore_case? then LibPCRE::CASELESS when .multiline? then LibPCRE::DOTALL | LibPCRE::MULTILINE + when .multiline_only? then LibPCRE::MULTILINE when .dotall? then LibPCRE::DOTALL when .extended? then LibPCRE::EXTENDED when .anchored? then LibPCRE::ANCHORED From f0ecbb2cdc21548eff4a913b6c803077216da7cb Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 12:53:45 -0300 Subject: [PATCH 04/10] typo --- src/regex.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/regex.cr b/src/regex.cr index 421ac6122d6a..fcc19d5f4f45 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -247,7 +247,7 @@ class Regex # # Equivalent to `MULTILINE | DOTALL` in PCRE and PCRE2. MULTILINE = 0x0000_0006 - # Equivalent to `MULTILINE`in PCRE and PCRE2. + # Equivalent to `MULTILINE` in PCRE and PCRE2. MULTILINE_ONLY = 0x0000_0040 DOTALL = 0x0000_0002 From 11fa2dc371791a808b9184dd40af7e81184d224c Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 12:57:25 -0300 Subject: [PATCH 05/10] Remove debug info --- src/regex.cr | 1 - 1 file changed, 1 deletion(-) diff --git a/src/regex.cr b/src/regex.cr index fcc19d5f4f45..f53cfff98113 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -604,7 +604,6 @@ class Regex # Regex.new("ab+c", :anchored).inspect # => Regex.new("ab+c", Regex::Options::ANCHORED) # ``` def inspect(io : IO) : Nil - p! options, ~CompileOptions[IGNORE_CASE, MULTILINE, EXTENDED] if (options & ~CompileOptions[IGNORE_CASE, MULTILINE, EXTENDED]).none? inspect_literal(io) else From 6846a72e95936c64f7ba212448786c8d55d7aec0 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 12:57:39 -0300 Subject: [PATCH 06/10] Remove special handling of MULTILINE --- src/regex/pcre.cr | 1 - src/regex/pcre2.cr | 1 - 2 files changed, 2 deletions(-) diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index eb17a3f5fd80..bf702d0705be 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -36,7 +36,6 @@ module Regex::PCRE if options.includes?(option) flag |= case option when .ignore_case? then LibPCRE::CASELESS - when .multiline? then LibPCRE::DOTALL | LibPCRE::MULTILINE when .multiline_only? then LibPCRE::MULTILINE when .dotall? then LibPCRE::DOTALL when .extended? then LibPCRE::EXTENDED diff --git a/src/regex/pcre2.cr b/src/regex/pcre2.cr index 4f955d77e94c..a0a5510d745c 100644 --- a/src/regex/pcre2.cr +++ b/src/regex/pcre2.cr @@ -67,7 +67,6 @@ module Regex::PCRE2 if options.includes?(option) flag |= case option when .ignore_case? then LibPCRE2::CASELESS - when .multiline? then LibPCRE2::DOTALL | LibPCRE2::MULTILINE when .multiline_only? then LibPCRE2::MULTILINE when .dotall? then LibPCRE2::DOTALL when .extended? then LibPCRE2::EXTENDED From aba27d5e13922c050356e9f4534ffe8a99335e8e Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 13:51:59 -0300 Subject: [PATCH 07/10] Update src/regex.cr Co-authored-by: Sijawusz Pur Rahnama --- src/regex.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/src/regex.cr b/src/regex.cr index f53cfff98113..75ad63d9e7ad 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -247,6 +247,7 @@ class Regex # # Equivalent to `MULTILINE | DOTALL` in PCRE and PCRE2. MULTILINE = 0x0000_0006 + # Equivalent to `MULTILINE` in PCRE and PCRE2. MULTILINE_ONLY = 0x0000_0040 From 603d7df35335a62b370ebed86e8b69d51119c1c6 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Wed, 7 Aug 2024 21:04:05 -0300 Subject: [PATCH 08/10] Restore special handling for MULTILINE, it broke a test --- src/regex/pcre.cr | 1 + src/regex/pcre2.cr | 1 + 2 files changed, 2 insertions(+) diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index bf702d0705be..db6adf06d4aa 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -36,6 +36,7 @@ module Regex::PCRE if options.includes?(option) flag |= case option when .ignore_case? then LibPCRE::CASELESS + when .multiline? then LibPCRE::MULTILINE | LibPCRE::DOTALL when .multiline_only? then LibPCRE::MULTILINE when .dotall? then LibPCRE::DOTALL when .extended? then LibPCRE::EXTENDED diff --git a/src/regex/pcre2.cr b/src/regex/pcre2.cr index a0a5510d745c..855ed86eb1d1 100644 --- a/src/regex/pcre2.cr +++ b/src/regex/pcre2.cr @@ -67,6 +67,7 @@ module Regex::PCRE2 if options.includes?(option) flag |= case option when .ignore_case? then LibPCRE2::CASELESS + when .multiline? then LibPCRE2::MULTILINE | LibPCRE2::DOTALL when .multiline_only? then LibPCRE2::MULTILINE when .dotall? then LibPCRE2::DOTALL when .extended? then LibPCRE2::EXTENDED From a27a19ed144eded11c90878d2f71124f8e56b49d Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 17 Sep 2024 17:23:39 -0300 Subject: [PATCH 09/10] Update src/regex.cr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Johannes Müller --- src/regex.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/regex.cr b/src/regex.cr index 75ad63d9e7ad..8b4c2fd5e128 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -249,7 +249,7 @@ class Regex MULTILINE = 0x0000_0006 # Equivalent to `MULTILINE` in PCRE and PCRE2. - MULTILINE_ONLY = 0x0000_0040 + MULTILINE_ONLY = 0x0000_0004 DOTALL = 0x0000_0002 From 0bdba96612ab3748cff15b5493caff92e2633efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Wed, 18 Sep 2024 10:06:17 +0200 Subject: [PATCH 10/10] `crystal tool format` --- src/regex.cr | 2 +- src/regex/pcre.cr | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/regex.cr b/src/regex.cr index 8b4c2fd5e128..c71ac9cd673a 100644 --- a/src/regex.cr +++ b/src/regex.cr @@ -240,7 +240,7 @@ class Regex # flag that activates both behaviours, so here we do the same by # mapping `MULTILINE` to `PCRE_MULTILINE | PCRE_DOTALL`. # The same applies for PCRE2 except that the native values are 0x200 and 0x400. - # + # # For the behaviour of `PCRE_MULTILINE` use `MULTILINE_ONLY`. # Multiline matching. diff --git a/src/regex/pcre.cr b/src/regex/pcre.cr index 9160e1c687ea..19decbb66712 100644 --- a/src/regex/pcre.cr +++ b/src/regex/pcre.cr @@ -36,8 +36,8 @@ module Regex::PCRE if options.includes?(option) flag |= case option when .ignore_case? then LibPCRE::CASELESS - when .multiline? then LibPCRE::MULTILINE | LibPCRE::DOTALL - when .multiline_only? then LibPCRE::MULTILINE + when .multiline? then LibPCRE::MULTILINE | LibPCRE::DOTALL + when .multiline_only? then LibPCRE::MULTILINE when .dotall? then LibPCRE::DOTALL when .extended? then LibPCRE::EXTENDED when .anchored? then LibPCRE::ANCHORED