Skip to content

Commit

Permalink
Merge pull request #2196 from athensresearch/parser-lookbehind
Browse files Browse the repository at this point in the history
Support Safari on Mac and iOS
  • Loading branch information
filipesilva authored May 27, 2022
2 parents d37352f + 65615f6 commit 5869f32
Show file tree
Hide file tree
Showing 12 changed files with 292 additions and 952 deletions.
35 changes: 10 additions & 25 deletions resources/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,35 +32,20 @@
localStorage.setItem("day8.re-frame-10x.show-panel","\"true\"")
localStorage.setItem("day8.re-frame-10x.using-trace?","\"true\"")
}
function isBrowserUnsupported() {
var isWebkit = userAgent.indexOf("applewebkit/") > -1;
var isChrome = userAgent.indexOf("chrome/") > -1;
// Chrome also has applewebkit in the useragent, but it is supported.
return isWebkit && !isChrome;
}
function showUnsupportedBrowserWarning() {
let warnDiv = window.document.createElement("div");
warnDiv.innerText = "Safari based browsers based are not supported";
window.document.body.appendChild(warnDiv);
}
</script>
<script>
if (isBrowserUnsupported()) {
showUnsupportedBrowserWarning();
var electron = isElectron()
var src = ""
if (electron) {
src = "js/compiled/renderer.js"
show10x() // 10x exists on DOM by default, but athens.style hides via CSS
} else {
var electron = isElectron()
var src = ""
if (electron) {
src = "js/compiled/renderer.js"
show10x() // 10x exists on DOM by default, but athens.style hides via CSS
} else {
src = "js/compiled/app.js"
}

var script = document.createElement("script");
script.setAttribute("src", src);
document.getElementsByTagName("head")[0].appendChild(script);
src = "js/compiled/app.js"
}

var script = document.createElement("script");
script.setAttribute("src", src);
document.getElementsByTagName("head")[0].appendChild(script);
</script>
</body>
</html>
61 changes: 30 additions & 31 deletions src/cljc/athens/parser/impl.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -74,86 +74,85 @@ inline = recur
(* closing `x` has: *)
(* - `(?<!\\s)`: it can't be preceded by a white space *)
(* - `(?!\\w)`: it can't be followed by a word character, when it can don't include it *)
(* regex lookbehinds `?<!` don't work at the start of a token so we're not using them *)
code-span = <#'(?<!\\w)`'>
#'(?s)([^`]|(?<=\\s)`(?=\\s))+'
code-span = <#'`'>
#'(?s)([^`]|\\B`(?=\\s))+'
<#'`(?!\\w)'>
strong-emphasis = <#'(?<!\\w)\\*\\*(?!\\s)'>
strong-emphasis = <#'\\*\\*(?!\\s)'>
recur
<#'(?<!\\s)\\*\\*(?!\\w)'>
<#'\\*\\*(?!\\w)'>
emphasis = <#'(?<!\\w)\\*(?!\\s)'>
emphasis = <#'\\*(?!\\s)'>
recur
<#'(?<!\\s)\\*(?!\\w)'>
<#'\\*(?!\\w)'>
highlight = <#'(?<!\\w)\\^\\^(?!\\s)'>
highlight = <#'\\^\\^(?!\\s)'>
recur
<#'(?<!\\s)\\^\\^(?!\\w)'>
<#'\\^\\^(?!\\w)'>
strikethrough = <#'(?<!\\w)~~(?!\\s)'>
strikethrough = <#'~~(?!\\s)'>
recur
<#'(?<!\\s)~~(?!\\w)'>
<#'~~(?!\\w)'>
link = md-link
image = <'!'> md-link
<md-link> = <#'(?<!\\w)\\[(?!\\s)'>
<md-link> = <#'\\[(?!\\s)'>
link-text
<#'(?<!\\s)\\]\\((?!\\s)'>
<#'\\]\\((?!\\s)'>
link-target
(<' '> link-title)?
<#'(?<!\\s)\\)(?!\\w)'>
<#'\\)(?!\\w)'>
link-text = #'([^\\]]|\\\\\\])*?(?=\\]\\()'
link-target = ( #'[^\\s\\(\\)]+' | '(' #'[^\\s\\)]*' ')' | '\\\\' ( '(' | ')' ) | #'\\s(?![\"\\'\\(])' )+
link-title = <'\"'> #'[^\"]+' <'\"'>
| <'\\''> #'[^\\']+' <'\\''>
| <'('> #'[^\\)]+' <')'>
autolink = <#'(?<!\\w)<(?!\\s)'>
autolink = <#'<(?!\\s)'>
#'[^>\\s]+'
<#'(?<!\\s)>(?!\\w)'>
<#'>(?!\\w)'>
block-ref = title?
<#'\\(\\((?!\\s)'>
#'.+?(?=\\)\\))'
<#'(?<!\\s)\\)\\)'>
<#'\\)\\)'>
page-link = title?
<#'(?<!\\w)\\[\\[(?!\\s)'>
<#'\\[\\[(?!\\s)'>
(#'[^\\[\\]\\#\\n]+' | page-link | hashtag-naked | hashtag-braced)+
<#'(?<!\\s)\\]\\](?!\\w)'>
<#'\\]\\](?!\\w)'>
hashtag-naked = <#'(?<!\\w)\\#(?!\\s)'>
hashtag-naked = <#'\\#(?!\\s)'>
#'[^\\ \\+\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\?\\\"\\;\\:\\]\\[]+(?!\\w)'
hashtag-braced = <#'(?<!\\w)\\#\\[\\[(?!\\s)'>
hashtag-braced = <#'\\#\\[\\[(?!\\s)'>
(#'[^\\[\\]\\#\\n]+' | page-link | hashtag-naked | hashtag-braced)+
<#'(?<!\\s)\\]\\](?!\\w)'>
<#'\\]\\](?!\\w)'>
component = <#'(?<!\\w)\\{\\{(?!\\s)'>
component = <#'\\{\\{(?!\\s)'>
(page-link / block-ref / #'.+(?=\\}\\})')
<#'(?<!\\s)\\}\\}(?!\\w)'>
<#'\\}\\}(?!\\w)'>
title = <#'(?<!\\w)\\[(?!\\s)'>
title = <#'\\[(?!\\s)'>
#'([^\\]]|\\\\\\])+(?=\\])'
<#'(?<!\\s)\\](?!\\s)'>
<#'\\](?!\\s)'>
latex = <#'(?<!\\w)\\$\\$(?!\\s)'>
latex = <#'\\$\\$(?!\\s)'>
#'(?s).+?(?=\\$\\$)'
<#'(?<!\\s)\\$\\$(?!\\w)'>
<#'\\$\\$(?!\\w)'>
(* characters with meaning (special chars) *)
(* every delimiter used as inline span boundary has to be added below *)
(* anything but special chars *)
text-run = #'(?:[^\\*`\\^~\\[!<\\(\\#\\$\\{\\r\\n]|(?<=\\S)[`!\\#\\$\\{])+'
text-run = #'(?:[^\\*`\\^~\\[!<\\(\\#\\$\\{\\r\\n]|\\b[`!\\#\\$\\{])+'
(* any special char *)
<special-char> = #'(?<!\\w)[\\*`^~\\[!<\\(\\#\\$\\{]'
<backtick> = #'(?<!`)`(?!`)'
<special-char> = #'[\\*`^~\\[!<\\(\\#\\$\\{]'
newline = #'\\n'
")
Expand Down
109 changes: 92 additions & 17 deletions src/cljc/athens/patterns.cljc
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
(ns athens.patterns)


(defn unlinked
"Exclude #title or [[title]].
JavaScript negative lookarounds https://javascript.info/regexp-lookahead-lookbehind
Lookarounds don't consume characters https://stackoverflow.com/questions/27179991/regex-matching-multiple-negative-lookahead "
[string]
(re-pattern (str "(?i)(?<!#)(?<!\\[\\[)" string "(?!\\]\\])")))


;; Matches a date with an ordinal number (roam format), considering the correct ordinal
;; suffix based on the ending number of the date
;; Regular expression, with test cases can be found here https://regex101.com/r/vOzOl9/1
;; Any update to this should be done after testing it using the previous regex101 link
(def roam-date #"((?<=\s1\d)th|(?<=(\s|[023456789])\d)((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[4567890])th)),(?=\s\d{4})")
(ns athens.patterns
(:require
[clojure.string :as string]))


(defn date
Expand All @@ -26,7 +13,95 @@
(re-find #"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2}(?:st|nd|rd|th),\s\d{4}\b" str))


(def ordinal->number
{"1st" "1"
"2nd" "2"
"3rd" "3"
"4th" "4"
"5th" "5"
"6th" "6"
"7th" "7"
"8th" "8"
"9th" "9"
"10th" "10"
"11th" "11"
"12th" "12"
"13th" "13"
"14th" "14"
"15th" "15"
"16th" "16"
"17th" "17"
"18th" "18"
"19th" "19"
"20th" "20"
"21st" "21"
"22nd" "22"
"23rd" "23"
"24th" "24"
"25th" "25"
"26th" "26"
"27th" "27"
"28th" "28"
"29th" "29"
"30th" "30"
"31st" "31"})


(defn replace-roam-date
[string]
(clojure.string/replace string athens.patterns/roam-date ","))
(string/replace string #"\d?\d(?:st|nd|rd|th)" #(or (ordinal->number %) %)))


;; https://stackoverflow.com/a/11672480
(def regex-esc-char-map
(let [esc-chars "()*&^%$#![]"]
(zipmap esc-chars
(map #(str "\\" %) esc-chars))))


;; TODO: consider https://clojuredocs.org/clojure.string/re-quote-replacement if this causes problems.
(defn escape-str
"Take a string and escape all regex special characters in it"
[str]
(string/escape str regex-esc-char-map))


(defn contains-unlinked?
"Returns true if string contains title unlinked (e.g. not as #title or [[title]])."
[title string]
;; This would be easier with a lookbehind: (re-pattern (str "(?i)(?!#)(?!\\[\\[)" string "(?!\\]\\])"))
;; But Safari doesn't support lookbehinds, so we're using a more complex trick
;; https://www.rexegg.com/regex-best-trick.html#pseudoregex.
;; The regex to find unlinked foo bar would be #foo bar|\[\[foo bar\]\]|(foo bar)
;; the general formula is NotThis|NotThat|GoAway|(WeWantThis)
;; The way it works is that the bad cases fall outside the capture group, so the capture
;; group will only contain the right thing.
;; We need to look inside the capture groups with this method though.
(let [t (escape-str title)]
(-> (re-pattern (str "(?i)" "#" t "|\\[\\[" t "\\]\\]|(" t ")"))
(re-find string)
second
boolean)))


(defn re-case-insensitive
"More options here https://clojuredocs.org/clojure.core/re-pattern"
[query]
(re-pattern (str "(?i)" (escape-str query))))


(defn split-on
"Splits string whenever value is encountered. Returns all substrings including value."
[s value]
(loop [last-idx 0
word-start-idx (string/index-of s value)
ret []]
(if word-start-idx
(let [word-end-idx' (+ word-start-idx (count value))]
(recur word-end-idx'
(string/index-of s value word-end-idx')
(-> ret
(conj (subs s last-idx word-start-idx))
(conj (subs s word-start-idx word-end-idx')))))
(conj ret (subs s last-idx)))))

26 changes: 7 additions & 19 deletions src/cljs/athens/db.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
[athens.common.sentry :refer-macros [defntrace]]
[athens.electron.utils :as electron.utils]
[athens.patterns :as patterns]
[athens.util :refer [escape-str]]
[clojure.edn :as edn]
[clojure.string :as string]
[datascript.core :as d]
Expand Down Expand Up @@ -395,12 +394,6 @@
(recur (get children (dec n))))))))


(defntrace re-case-insensitive
"More options here https://clojuredocs.org/clojure.core/re-pattern"
[query]
(re-pattern (str "(?i)" (escape-str query))))


(defntrace search-exact-node-title
[query]
(d/entity @dsdb [:node/title query]))
Expand All @@ -413,7 +406,7 @@
(if (string/blank? query)
(vector)
(let [exact-match (when exclude-exact-match? query)
case-insensitive-query (re-case-insensitive query)]
case-insensitive-query (patterns/re-case-insensitive query)]
(sequence
(comp
(filter (every-pred
Expand All @@ -439,7 +432,7 @@
([query n]
(if (string/blank? query)
(vector)
(let [case-insensitive-query (re-case-insensitive query)]
(let [case-insensitive-query (patterns/re-case-insensitive query)]
(->>
(d/datoms @dsdb :aevt :block/string)
(sequence
Expand Down Expand Up @@ -593,14 +586,14 @@
;; -- Linked & Unlinked References ----------

(defntrace get-ref-ids
[pattern]
[unlinked-f]
(d/q '[:find [?e ...]
:in $ ?regex
:in $ ?unlinked-f
:where
[?e :block/string ?s]
[(re-find ?regex ?s)]]
[(?unlinked-f ?s)]]
@dsdb
pattern))
unlinked-f))


(defn merge-parents-and-block
Expand All @@ -625,15 +618,10 @@
blocks))


(defn get-data
[pattern]
(-> pattern get-ref-ids merge-parents-and-block group-by-parent seq))


(defntrace get-unlinked-references
"For node-page references UI."
[title]
(-> title patterns/unlinked get-data))
(-> (partial patterns/contains-unlinked? title) get-ref-ids merge-parents-and-block group-by-parent seq))


;; -- save ------------------------------------------------------------
Expand Down
15 changes: 0 additions & 15 deletions src/cljs/athens/util.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -179,21 +179,6 @@
(.. event -target -value))


;; -- Regex -----------------------------------------------------------

;; https://stackoverflow.com/a/11672480
(def regex-esc-char-map
(let [esc-chars "()*&^%$#![]"]
(zipmap esc-chars
(map #(str "\\" %) esc-chars))))


(defn escape-str
"Take a string and escape all regex special characters in it"
[str]
(string/escape str regex-esc-char-map))


;; -- specter --------------------------------------------------------


Expand Down
Loading

0 comments on commit 5869f32

Please sign in to comment.