From bc8d3d0451d52e03d29eba14184374b2eaf30c34 Mon Sep 17 00:00:00 2001 From: Jayesh Bhoot Date: Wed, 17 Apr 2024 08:57:09 +0530 Subject: [PATCH 1/3] Add :has selector --- src/soup.ml | 6 ++++++ test/performance/performance.ml | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/soup.ml b/src/soup.ml index 2347c6c..a79fe12 100644 --- a/src/soup.ml +++ b/src/soup.ml @@ -485,6 +485,7 @@ struct | OnlyOfType | Empty | Content of string + | Has of simple_selector | Not of simple_selector and simple_selector = @@ -595,6 +596,8 @@ struct | OnlyOfType -> element_count_with_name (name node) node = 1 | Empty -> no_children node | Content s -> texts node |> String.concat "" |> has_substring s + | Has selector -> + let matching_nodes = filter (fun n -> matches_simple_selector n selector) (descendants node) in count matching_nodes > 0 | Not selector -> not (matches_simple_selector node selector) and matches_simple_selector node = function @@ -921,6 +924,9 @@ struct let s = parse_parenthesized_value parse_quoted_string stream in Content s | "empty" -> Empty + | "has" -> + let selector = parse_parenthesized_value parse_simple_selector stream in + Has selector | "not" -> let selector = parse_parenthesized_value parse_simple_selector stream in Not selector diff --git a/test/performance/performance.ml b/test/performance/performance.ml index a6b515e..b303db6 100644 --- a/test/performance/performance.ml +++ b/test/performance/performance.ml @@ -27,8 +27,20 @@ let () = let selector = "form[action*=search]" in assert (soup $ selector |> name = "form"); - measure 1000 "select" (fun () -> soup $ selector |> ignore); + measure 1000 ("select " ^ selector) (fun () -> soup $ selector |> ignore); + + let selector = ":has([id=mngb])" in + assert (soup $$ selector |> count = 2); + measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore); + + let selector = ":has([action*=search])" in + assert (soup $$ selector |> count = 3); + measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore); + + let selector = ":has([name=gbv])" in + assert (soup $$ selector |> count = 4); + measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore); let selector = "*" in assert (soup $$ selector |> count > 10); - measure 1000 "select_all" (fun () -> soup $$ selector |> count |> ignore) + measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore) From 6f8f0370c79f28d0eb4305bf5fc4ee27084937b4 Mon Sep 17 00:00:00 2001 From: Jayesh Bhoot Date: Thu, 25 Jul 2024 01:19:28 +0530 Subject: [PATCH 2/3] add tests for :has selector --- test/test.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test.ml b/test/test.ml index 378b9a5..ac7e693 100644 --- a/test/test.ml +++ b/test/test.ml @@ -122,6 +122,8 @@ let suites = [ test "p:empty" 1; test "ul li:not(:nth-child(1))" 2; test ":not(ul) > li" 2; + test ":has([id=one])" 3; + test ":has(.odd)" 4; test ("html:root > body.lists[class~=lists] > ul > li#one:nth-child(1) " ^ "+ li#two") @@ -155,7 +157,9 @@ let suites = [ test "[id=\"\\\"dquotes\\\"\"]" 1; test "[id=\"simple'quote\"]" 1; test "[id='simple\\'quote']" 1; - test "[id='back\\slash']" 1); + test "[id='back\\slash']" 1; + test "[id=\"bracket]\"]" 1; + test ":has([id=\"bracket]\"])" 2); ("parse-fail-quoted" >:: fun _ -> let soup = page "quoted" |> parse in From 6f8f3ea9389a707668966e593d111424353ebab9 Mon Sep 17 00:00:00 2001 From: Anton Bachin Date: Thu, 25 Jul 2024 01:09:57 +0300 Subject: [PATCH 3/3] Tweaks --- src/soup.ml | 9 ++++++--- test/performance/performance.ml | 12 ++++++++---- test/test.ml | 3 +++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/soup.ml b/src/soup.ml index a79fe12..992edd1 100644 --- a/src/soup.ml +++ b/src/soup.ml @@ -596,8 +596,11 @@ struct | OnlyOfType -> element_count_with_name (name node) node = 1 | Empty -> no_children node | Content s -> texts node |> String.concat "" |> has_substring s - | Has selector -> - let matching_nodes = filter (fun n -> matches_simple_selector n selector) (descendants node) in count matching_nodes > 0 + | Has selector -> + descendants node + |> filter (fun descendant -> matches_simple_selector descendant selector) + |> count + |> fun count -> count > 0 | Not selector -> not (matches_simple_selector node selector) and matches_simple_selector node = function @@ -924,7 +927,7 @@ struct let s = parse_parenthesized_value parse_quoted_string stream in Content s | "empty" -> Empty - | "has" -> + | "has" -> let selector = parse_parenthesized_value parse_simple_selector stream in Has selector | "not" -> diff --git a/test/performance/performance.ml b/test/performance/performance.ml index b303db6..f290bca 100644 --- a/test/performance/performance.ml +++ b/test/performance/performance.ml @@ -31,16 +31,20 @@ let () = let selector = ":has([id=mngb])" in assert (soup $$ selector |> count = 2); - measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore); + measure 1000 ("select_all " ^ selector) + (fun () -> soup $$ selector |> count |> ignore); let selector = ":has([action*=search])" in assert (soup $$ selector |> count = 3); - measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore); + measure 1000 ("select_all " ^ selector) + (fun () -> soup $$ selector |> count |> ignore); let selector = ":has([name=gbv])" in assert (soup $$ selector |> count = 4); - measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore); + measure 1000 ("select_all " ^ selector) + (fun () -> soup $$ selector |> count |> ignore); let selector = "*" in assert (soup $$ selector |> count > 10); - measure 1000 ("select_all " ^ selector) (fun () -> soup $$ selector |> count |> ignore) + measure 1000 ("select_all " ^ selector) + (fun () -> soup $$ selector |> count |> ignore) diff --git a/test/test.ml b/test/test.ml index ac7e693..c3abcb3 100644 --- a/test/test.ml +++ b/test/test.ml @@ -123,6 +123,9 @@ let suites = [ test "ul li:not(:nth-child(1))" 2; test ":not(ul) > li" 2; test ":has([id=one])" 3; + test "ul:has([id=one])" 1; + test "ol:has([id=one])" 0; + test "li:has([id=one])" 0; test ":has(.odd)" 4; test ("html:root > body.lists[class~=lists] > ul > li#one:nth-child(1) " ^