From bc0f24a432e0b73c4a8ec9aee494f95c45dded60 Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Sat, 24 Feb 2024 11:48:56 +0000
Subject: [PATCH 1/2] flyem_shorten_url: filenames, titles, auto-naming

---
 R/url-shortening.R       | 88 ++++++++++++++++++++++++++++------------
 man/flyem_shorten_url.Rd | 50 +++++++++++++++++++----
 2 files changed, 104 insertions(+), 34 deletions(-)

diff --git a/R/url-shortening.R b/R/url-shortening.R
index 2d5bacf..3eede11 100644
--- a/R/url-shortening.R
+++ b/R/url-shortening.R
@@ -1,50 +1,86 @@
-#' Shorten a Neuroglancer URL using the Janelia FlyEM link shortener
+#'Shorten a Neuroglancer URL using the Janelia FlyEM link shortener
 #'
-#' @param url One or more urls to shorten or expand
-#' @param filename An optional filename to use in the shortened URL (not yet
-#'   supported)
-#' @param return When expanding, whether to return a long URL, an R list or a
-#'   JSON text fragment.
-#' @param ... Additional arguments passed to \code{httr::POST}
+#'@details The default filename for these fragments consists of the date and
+#'  time to the nearest second. For this reason you will have trouble generating
+#'  many of these links in quick succession. To overcome this limitation, you
+#'  can specify your own filename. We also provide two convenience naming
+#'  methods:
 #'
-#' @return For \code{flyem_shorten_url} a character vector containing a short
-#'   URL. For \code{flyem_expand_url} see a character vector or list depending
-#'   on \code{return} argument. If the input \code{url} argument is named vector
-#'   of length>1, then the output will also be named.
-#' @export
-#' @details see
+#'. \itemize{
+#'
+#'    \item md5 An md5 hash of the URL+title e.g. \code{"9a35fc580f710f3a62b2809a10fe106d.json"}
+#'
+#'.   \item ms timestamp to the nearest millisecond e.g. \code{"1708773000.001.json"}
+#'
+#'   }
+#'
+#'  Note that this is an open endpoint so there are two potential security
+#'  concerns. URLs named by date/time can potentially be guessed and inspected.
+#'  Known URLs can be overwritten to point to a new location. If these are
+#'  concerns then the MD5 hash format has some advantages.
+#'
+#'@param url One or more URLs to shorten or expand
+#'@param filename An optional filename to use in the shortened URL. You can also
+#'  provide a URL in which case the terminal filename will be extracted.
+#'@param title An optional title for the webpage
+#'@param method An optional scheme for automatic naming of shortened URLs. See
+#'  details.
+#'@param return When expanding, whether to return a long URL, an R list or a
+#'  JSON text fragment.
+#'@param ... Additional arguments passed to \code{httr::POST}
+#'
+#'@return For \code{flyem_shorten_url} a character vector containing a short
+#'  URL. For \code{flyem_expand_url} a character vector or list depending on the
+#'  \code{return} argument. If the input \code{url} argument is a named vector
+#'  of length>1, then the output will also be named.
+#'@export
+#'@details see
 #'   \href{https://flyem-cns.slack.com/archives/C01BZB05M8C/p1669646269799509}{FlyEM
 #'    CNS Slack} for more details.
 #'
 #' @examples
 #' \dontrun{
-#' su=flyem_shorten_url(manc_scene('group:10200'))
+#' # this reads the URL from the clipboard
+#' su=flyem_shorten_url(clipr::read_clip())
 #' lu=flyem_expand_url(su)
 #' fafbseg::ngl_decode_scene(lu)
 #' # these give you the same result
 #' browseURL(su)
 #' browseURL(lu)
+#'
+#' # Generate many unique short URLs based on an MD5 hash of the long URL
+#' sus=flyem_shorten_url("<Long URLs>", method='md5')
 #' }
-flyem_shorten_url <- function(url, filename=NULL, ...) {
+flyem_shorten_url <- function(url, filename=NA_character_, title=NA_character_,
+                              method=c("default", "md5", "ms"), ...) {
+  method=match.arg(method)
   if(length(url)>1) {
     named=!is.null(names(url))
-    res <- if(is.null(filename))
-      pbapply::pbmapply(flyem_shorten_url, url=url, ..., USE.NAMES = named)
-    else
-      pbapply::pbmapply(flyem_shorten_url, url=url, filename=filename, ..., USE.NAMES = named)
+    res <- pbapply::pbmapply(flyem_shorten_url, url=url,
+                        filename=filename, title=title, ...,
+                        MoreArgs = list(method=method), USE.NAMES = named)
     return(res)
   }
-  # body=list(url, filename=filename)
-  # body=list(fafbseg::ngl_encode_url(url))
-  body <- if(is.null(filename)) url else {
-    stop("filename argument not yet supported!")
-    list(url, filename=filename)
+  if(is.na(title)) title=NULL
+  if(method=='md5') {
+    md5=digest::digest(list(url, title), algo = 'md5')
+    filename=paste0(md5, ".json")
+  } else if(method=='ms') {
+    ts=format(round(as.numeric(Sys.time()), digits = 3), digits=15, scientific = F)
+    filename=paste0(ts, '.json')
+  } else {
+    # convert URL to filename as a convenience
+    if(isTRUE(grepl("^http(s){0,1}://", filename)))
+      filename=basename(filename)
+  }
+  if(is.na(filename)) filename=NULL
+  body <- if(is.null(filename) && is.null(title)) url else {
+    list(filename=filename, text=url, title=title)
   }
   us='https://shortng-bmcp5imp6q-uc.a.run.app/shortng'
-  res=httr::POST(url = us, body = body, encode = 'json', ...)
+  res=httr::POST(url = us, body = body, encode = 'multipart', ...)
   httr::stop_for_status(res)
   httr::content(res, as='text')
-
 }
 
 #' @export
diff --git a/man/flyem_shorten_url.Rd b/man/flyem_shorten_url.Rd
index e97bfa1..3df9ed7 100644
--- a/man/flyem_shorten_url.Rd
+++ b/man/flyem_shorten_url.Rd
@@ -5,15 +5,26 @@
 \alias{flyem_expand_url}
 \title{Shorten a Neuroglancer URL using the Janelia FlyEM link shortener}
 \usage{
-flyem_shorten_url(url, filename = NULL, ...)
+flyem_shorten_url(
+  url,
+  filename = NA_character_,
+  title = NA_character_,
+  method = c("default", "md5", "ms"),
+  ...
+)
 
 flyem_expand_url(url, return = c("url", "json", "parsed"), ...)
 }
 \arguments{
-\item{url}{One or more urls to shorten or expand}
+\item{url}{One or more URLs to shorten or expand}
 
-\item{filename}{An optional filename to use in the shortened URL (not yet
-supported)}
+\item{filename}{An optional filename to use in the shortened URL. You can also
+provide a URL in which case the terminal filename will be extracted.}
+
+\item{title}{An optional title for the webpage}
+
+\item{method}{An optional scheme for automatic naming of shortened URLs. See
+details.}
 
 \item{...}{Additional arguments passed to \code{httr::POST}}
 
@@ -22,25 +33,48 @@ JSON text fragment.}
 }
 \value{
 For \code{flyem_shorten_url} a character vector containing a short
-  URL. For \code{flyem_expand_url} see a character vector or list depending
-  on \code{return} argument. If the input \code{url} argument is named vector
-  of length>1, then the output will also be named.
+ URL. For \code{flyem_expand_url} a character vector or list depending on the
+ \code{return} argument. If the input \code{url} argument is a named vector
+ of length>1, then the output will also be named.
 }
 \description{
 Shorten a Neuroglancer URL using the Janelia FlyEM link shortener
 }
 \details{
+The default filename for these fragments consists of the date and
+ time to the nearest second. For this reason you will have trouble generating
+ many of these links in quick succession. To overcome this limitation, you
+ can specify your own filename. We also provide two convenience naming
+ methods:
+
+. \itemize{
+
+   \item md5 An md5 hash of the URL+title e.g. \code{"9a35fc580f710f3a62b2809a10fe106d.json"}
+
+.   \item ms timestamp to the nearest millisecond e.g. \code{"1708773000.001.json"}
+
+  }
+
+ Note that this is an open endpoint so there are two potential security
+ concerns. URLs named by date/time can potentially be guessed and inspected.
+ Known URLs can be overwritten to point to a new location. If these are
+ concerns then the MD5 hash format has some advantages.
+
 see
   \href{https://flyem-cns.slack.com/archives/C01BZB05M8C/p1669646269799509}{FlyEM
    CNS Slack} for more details.
 }
 \examples{
 \dontrun{
-su=flyem_shorten_url(manc_scene('group:10200'))
+# this reads the URL from the clipboard
+su=flyem_shorten_url(clipr::read_clip())
 lu=flyem_expand_url(su)
 fafbseg::ngl_decode_scene(lu)
 # these give you the same result
 browseURL(su)
 browseURL(lu)
+
+# Generate many unique short URLs based on an MD5 hash of the long URL
+sus=flyem_shorten_url("<Long URLs>", method='md5')
 }
 }

From aeb0bc4eae9a6d0d241f051fb93bace2dadbad6b Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Sat, 24 Feb 2024 11:55:05 +0000
Subject: [PATCH 2/2] add url tests

---
 tests/testthat/test-url-shortening.R | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 tests/testthat/test-url-shortening.R

diff --git a/tests/testthat/test-url-shortening.R b/tests/testthat/test-url-shortening.R
new file mode 100644
index 0000000..a893e09
--- /dev/null
+++ b/tests/testthat/test-url-shortening.R
@@ -0,0 +1,13 @@
+test_that("url shortening works", {
+  skip_if_offline()
+
+  expect_is(lu <- malevnc::flyem_expand_url('https://neuroglancer-demo.appspot.com/#!gs://flyem-user-links/short/2023-12-15.090703.json'), 'character')
+
+  expect_equal(su <- flyem_shorten_url(c(lu,lu),
+                                       title=c("test1", "test2"),
+                                       method = 'md5'),
+               c("https://neuroglancer-demo.appspot.com/#!gs://flyem-user-links/short/59a3c8a14ac42f1561713d5e3c609381.json",
+                 "https://neuroglancer-demo.appspot.com/#!gs://flyem-user-links/short/8e0d1284e3af583db48d92dc02280eea.json"
+               )
+  )
+})