Merge pull request #2196 from athensresearch/parser-lookbehind

Support Safari on Mac and iOS
athensresearch · May 27, 2022 · 5869f32 · 5869f32
2 parents d37352f + 65615f6
commit 5869f32
Show file tree

Hide file tree

Showing 12 changed files with 292 additions and 952 deletions.
diff --git a/resources/public/index.html b/resources/public/index.html
@@ -32,35 +32,20 @@
         localStorage.setItem("day8.re-frame-10x.show-panel","\"true\"")
         localStorage.setItem("day8.re-frame-10x.using-trace?","\"true\"")
       }
-      function isBrowserUnsupported() {
-        var isWebkit = userAgent.indexOf("applewebkit/") > -1;
-        var isChrome = userAgent.indexOf("chrome/") > -1;
-        // Chrome also has applewebkit in the useragent, but it is supported.
-        return isWebkit && !isChrome;
-      }
-      function showUnsupportedBrowserWarning() {
-          let warnDiv = window.document.createElement("div");
-          warnDiv.innerText = "Safari based browsers based are not supported";
-          window.document.body.appendChild(warnDiv);
-      }
     </script>
     <script>
-      if (isBrowserUnsupported()) {
-        showUnsupportedBrowserWarning();
+      var electron = isElectron()
+      var src = ""
+      if (electron) {
+         src = "js/compiled/renderer.js"
+         show10x() // 10x exists on DOM by default, but athens.style hides via CSS
       } else {
-        var electron = isElectron()
-        var src = ""
-        if (electron) {
-           src = "js/compiled/renderer.js"
-           show10x() // 10x exists on DOM by default, but athens.style hides via CSS
-        } else {
-           src = "js/compiled/app.js"
-        }
-
-        var script = document.createElement("script");
-        script.setAttribute("src", src);
-        document.getElementsByTagName("head")[0].appendChild(script);
+         src = "js/compiled/app.js"
       }
+
+      var script = document.createElement("script");
+      script.setAttribute("src", src);
+      document.getElementsByTagName("head")[0].appendChild(script);
     </script>
   </body>
 </html>
diff --git a/src/cljc/athens/parser/impl.cljc b/src/cljc/athens/parser/impl.cljc
@@ -74,86 +74,85 @@ inline = recur
 (* closing `x` has: *)
 (* - `(?<!\\s)`: it can't be preceded by a white space *)
 (* - `(?!\\w)`: it can't be followed by a word character, when it can don't include it *)
+(* regex lookbehinds `?<!` don't work at the start of a token so we're not using them *)
    
-code-span = <#'(?<!\\w)`'>
-            #'(?s)([^`]|(?<=\\s)`(?=\\s))+'
+code-span = <#'`'>
+            #'(?s)([^`]|\\B`(?=\\s))+'
             <#'`(?!\\w)'>
 
-strong-emphasis = <#'(?<!\\w)\\*\\*(?!\\s)'>
+strong-emphasis = <#'\\*\\*(?!\\s)'>
                   recur
-                  <#'(?<!\\s)\\*\\*(?!\\w)'>
+                  <#'\\*\\*(?!\\w)'>
 
-emphasis = <#'(?<!\\w)\\*(?!\\s)'>
+emphasis = <#'\\*(?!\\s)'>
            recur
-           <#'(?<!\\s)\\*(?!\\w)'>
+           <#'\\*(?!\\w)'>
 
-highlight = <#'(?<!\\w)\\^\\^(?!\\s)'>
+highlight = <#'\\^\\^(?!\\s)'>
             recur
-            <#'(?<!\\s)\\^\\^(?!\\w)'>
+            <#'\\^\\^(?!\\w)'>
 
-strikethrough = <#'(?<!\\w)~~(?!\\s)'>
+strikethrough = <#'~~(?!\\s)'>
                 recur
-                <#'(?<!\\s)~~(?!\\w)'>
+                <#'~~(?!\\w)'>
 
 link = md-link
 image = <'!'> md-link
 
-<md-link> = <#'(?<!\\w)\\[(?!\\s)'>
+<md-link> = <#'\\[(?!\\s)'>
             link-text
-            <#'(?<!\\s)\\]\\((?!\\s)'>
+            <#'\\]\\((?!\\s)'>
             link-target
             (<' '> link-title)?
-            <#'(?<!\\s)\\)(?!\\w)'>
+            <#'\\)(?!\\w)'>
 
 link-text = #'([^\\]]|\\\\\\])*?(?=\\]\\()'
 link-target = ( #'[^\\s\\(\\)]+' | '(' #'[^\\s\\)]*' ')' | '\\\\' ( '(' | ')' ) | #'\\s(?![\"\\'\\(])' )+
 link-title = <'\"'> #'[^\"]+' <'\"'>
            | <'\\''> #'[^\\']+' <'\\''>
            | <'('> #'[^\\)]+' <')'>
 
-autolink = <#'(?<!\\w)<(?!\\s)'>
+autolink = <#'<(?!\\s)'>
            #'[^>\\s]+'
-           <#'(?<!\\s)>(?!\\w)'>
+           <#'>(?!\\w)'>
 
 block-ref = title?
             <#'\\(\\((?!\\s)'>
             #'.+?(?=\\)\\))'
-            <#'(?<!\\s)\\)\\)'>
+            <#'\\)\\)'>
 
 page-link = title?
-            <#'(?<!\\w)\\[\\[(?!\\s)'>
+            <#'\\[\\[(?!\\s)'>
             (#'[^\\[\\]\\#\\n]+' | page-link | hashtag-naked | hashtag-braced)+
-            <#'(?<!\\s)\\]\\](?!\\w)'>
+            <#'\\]\\](?!\\w)'>
 
-hashtag-naked = <#'(?<!\\w)\\#(?!\\s)'>
+hashtag-naked = <#'\\#(?!\\s)'>
                 #'[^\\ \\+\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\?\\\"\\;\\:\\]\\[]+(?!\\w)'
 
-hashtag-braced = <#'(?<!\\w)\\#\\[\\[(?!\\s)'>
+hashtag-braced = <#'\\#\\[\\[(?!\\s)'>
                  (#'[^\\[\\]\\#\\n]+' | page-link | hashtag-naked | hashtag-braced)+
-                 <#'(?<!\\s)\\]\\](?!\\w)'>
+                 <#'\\]\\](?!\\w)'>
 
-component = <#'(?<!\\w)\\{\\{(?!\\s)'>
+component = <#'\\{\\{(?!\\s)'>
             (page-link / block-ref / #'.+(?=\\}\\})')
-            <#'(?<!\\s)\\}\\}(?!\\w)'>
+            <#'\\}\\}(?!\\w)'>
 
-title = <#'(?<!\\w)\\[(?!\\s)'>
+title = <#'\\[(?!\\s)'>
         #'([^\\]]|\\\\\\])+(?=\\])'
-        <#'(?<!\\s)\\](?!\\s)'>
+        <#'\\](?!\\s)'>
 
-latex = <#'(?<!\\w)\\$\\$(?!\\s)'>
+latex = <#'\\$\\$(?!\\s)'>
         #'(?s).+?(?=\\$\\$)'
-        <#'(?<!\\s)\\$\\$(?!\\w)'>
+        <#'\\$\\$(?!\\w)'>
 
 (* characters with meaning (special chars) *)
 (* every delimiter used as inline span boundary has to be added below *)
 
 (* anything but special chars *)
-text-run = #'(?:[^\\*`\\^~\\[!<\\(\\#\\$\\{\\r\\n]|(?<=\\S)[`!\\#\\$\\{])+'
+text-run = #'(?:[^\\*`\\^~\\[!<\\(\\#\\$\\{\\r\\n]|\\b[`!\\#\\$\\{])+'
 
 (* any special char *)
-<special-char> = #'(?<!\\w)[\\*`^~\\[!<\\(\\#\\$\\{]'
-
-<backtick> = #'(?<!`)`(?!`)'
+<special-char> = #'[\\*`^~\\[!<\\(\\#\\$\\{]'
 
 newline = #'\\n'
 ")

diff --git a/src/cljc/athens/patterns.cljc b/src/cljc/athens/patterns.cljc
@@ -1,19 +1,6 @@
-(ns athens.patterns)
-
-
-(defn unlinked
-  "Exclude #title or [[title]].
-   JavaScript negative lookarounds https://javascript.info/regexp-lookahead-lookbehind
-   Lookarounds don't consume characters https://stackoverflow.com/questions/27179991/regex-matching-multiple-negative-lookahead "
-  [string]
-  (re-pattern (str "(?i)(?<!#)(?<!\\[\\[)" string "(?!\\]\\])")))
-
-
-;; Matches a date with an ordinal number (roam format), considering the correct ordinal
-;; suffix based on the ending number of the date
-;; Regular expression, with test cases can be found here https://regex101.com/r/vOzOl9/1
-;; Any update to this should be done after testing it using the previous regex101 link
-(def roam-date #"((?<=\s1\d)th|(?<=(\s|[023456789])\d)((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[4567890])th)),(?=\s\d{4})")
+(ns athens.patterns
+  (:require
+    [clojure.string :as string]))
 
 
 (defn date
@@ -26,7 +13,95 @@
   (re-find #"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2}(?:st|nd|rd|th),\s\d{4}\b" str))
 
 
+(def ordinal->number
+  {"1st"  "1"
+   "2nd"  "2"
+   "3rd"  "3"
+   "4th"  "4"
+   "5th"  "5"
+   "6th"  "6"
+   "7th"  "7"
+   "8th"  "8"
+   "9th"  "9"
+   "10th" "10"
+   "11th" "11"
+   "12th" "12"
+   "13th" "13"
+   "14th" "14"
+   "15th" "15"
+   "16th" "16"
+   "17th" "17"
+   "18th" "18"
+   "19th" "19"
+   "20th" "20"
+   "21st" "21"
+   "22nd" "22"
+   "23rd" "23"
+   "24th" "24"
+   "25th" "25"
+   "26th" "26"
+   "27th" "27"
+   "28th" "28"
+   "29th" "29"
+   "30th" "30"
+   "31st" "31"})
+
+
 (defn replace-roam-date
   [string]
-  (clojure.string/replace string athens.patterns/roam-date ","))
+  (string/replace string #"\d?\d(?:st|nd|rd|th)" #(or (ordinal->number %) %)))
+
+
+;; https://stackoverflow.com/a/11672480
+(def regex-esc-char-map
+  (let [esc-chars "()*&^%$#![]"]
+    (zipmap esc-chars
+            (map #(str "\\" %) esc-chars))))
+
+
+;; TODO: consider https://clojuredocs.org/clojure.string/re-quote-replacement if this causes problems.
+(defn escape-str
+  "Take a string and escape all regex special characters in it"
+  [str]
+  (string/escape str regex-esc-char-map))
+
+
+(defn contains-unlinked?
+  "Returns true if string contains title unlinked (e.g. not as #title or [[title]])."
+  [title string]
+  ;; This would be easier with a lookbehind: (re-pattern (str "(?i)(?!#)(?!\\[\\[)" string "(?!\\]\\])"))
+  ;; But Safari doesn't support lookbehinds, so we're using a more complex trick
+  ;; https://www.rexegg.com/regex-best-trick.html#pseudoregex.
+  ;; The regex to find unlinked foo bar would be #foo bar|\[\[foo bar\]\]|(foo bar)
+  ;; the general formula is NotThis|NotThat|GoAway|(WeWantThis)
+  ;; The way it works is that the bad cases fall outside the capture group, so the capture
+  ;; group will only contain the right thing.
+  ;; We need to look inside the capture groups with this method though.
+  (let [t (escape-str title)]
+    (-> (re-pattern (str "(?i)" "#" t "|\\[\\[" t "\\]\\]|(" t ")"))
+        (re-find string)
+        second
+        boolean)))
+
+
+(defn re-case-insensitive
+  "More options here https://clojuredocs.org/clojure.core/re-pattern"
+  [query]
+  (re-pattern (str "(?i)" (escape-str query))))
+
+
+(defn split-on
+  "Splits string whenever value is encountered. Returns all substrings including value."
+  [s value]
+  (loop [last-idx       0
+         word-start-idx (string/index-of s value)
+         ret            []]
+    (if word-start-idx
+      (let [word-end-idx' (+ word-start-idx (count value))]
+        (recur word-end-idx'
+               (string/index-of s value word-end-idx')
+               (-> ret
+                   (conj (subs s last-idx word-start-idx))
+                   (conj (subs s word-start-idx word-end-idx')))))
+      (conj ret (subs s last-idx)))))
 
diff --git a/src/cljs/athens/db.cljs b/src/cljs/athens/db.cljs
@@ -5,7 +5,6 @@
     [athens.common.sentry :refer-macros [defntrace]]
     [athens.electron.utils :as electron.utils]
     [athens.patterns :as patterns]
-    [athens.util :refer [escape-str]]
     [clojure.edn :as edn]
     [clojure.string :as string]
     [datascript.core :as d]
@@ -395,12 +394,6 @@
           (recur (get children (dec n))))))))
 
 
-(defntrace re-case-insensitive
-  "More options here https://clojuredocs.org/clojure.core/re-pattern"
-  [query]
-  (re-pattern (str "(?i)" (escape-str query))))
-
-
 (defntrace search-exact-node-title
   [query]
   (d/entity @dsdb [:node/title query]))
@@ -413,7 +406,7 @@
    (if (string/blank? query)
      (vector)
      (let [exact-match            (when exclude-exact-match? query)
-           case-insensitive-query (re-case-insensitive query)]
+           case-insensitive-query (patterns/re-case-insensitive query)]
        (sequence
          (comp
            (filter (every-pred
@@ -439,7 +432,7 @@
   ([query n]
    (if (string/blank? query)
      (vector)
-     (let [case-insensitive-query (re-case-insensitive query)]
+     (let [case-insensitive-query (patterns/re-case-insensitive query)]
        (->>
          (d/datoms @dsdb :aevt :block/string)
          (sequence
@@ -593,14 +586,14 @@
 ;; -- Linked & Unlinked References ----------
 
 (defntrace get-ref-ids
-  [pattern]
+  [unlinked-f]
   (d/q '[:find [?e ...]
-         :in $ ?regex
+         :in $ ?unlinked-f
          :where
          [?e :block/string ?s]
-         [(re-find ?regex ?s)]]
+         [(?unlinked-f ?s)]]
        @dsdb
-       pattern))
+       unlinked-f))
 
 
 (defn merge-parents-and-block
@@ -625,15 +618,10 @@
             blocks))
 
 
-(defn get-data
-  [pattern]
-  (-> pattern get-ref-ids merge-parents-and-block group-by-parent seq))
-
-
 (defntrace get-unlinked-references
   "For node-page references UI."
   [title]
-  (-> title patterns/unlinked get-data))
+  (-> (partial patterns/contains-unlinked? title) get-ref-ids merge-parents-and-block group-by-parent seq))
 
 
 ;; -- save ------------------------------------------------------------

diff --git a/src/cljs/athens/util.cljs b/src/cljs/athens/util.cljs
@@ -179,21 +179,6 @@
   (.. event -target -value))
 
 
-;; -- Regex -----------------------------------------------------------
-
-;; https://stackoverflow.com/a/11672480
-(def regex-esc-char-map
-  (let [esc-chars "()*&^%$#![]"]
-    (zipmap esc-chars
-            (map #(str "\\" %) esc-chars))))
-
-
-(defn escape-str
-  "Take a string and escape all regex special characters in it"
-  [str]
-  (string/escape str regex-esc-char-map))
-
-
 ;; -- specter --------------------------------------------------------