From bc0f24a432e0b73c4a8ec9aee494f95c45dded60 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis <jefferis@gmail.com> Date: Sat, 24 Feb 2024 11:48:56 +0000 Subject: [PATCH 1/2] flyem_shorten_url: filenames, titles, auto-naming --- R/url-shortening.R | 88 ++++++++++++++++++++++++++++------------ man/flyem_shorten_url.Rd | 50 +++++++++++++++++++---- 2 files changed, 104 insertions(+), 34 deletions(-) diff --git a/R/url-shortening.R b/R/url-shortening.R index 2d5bacf..3eede11 100644 --- a/R/url-shortening.R +++ b/R/url-shortening.R @@ -1,50 +1,86 @@ -#' Shorten a Neuroglancer URL using the Janelia FlyEM link shortener +#'Shorten a Neuroglancer URL using the Janelia FlyEM link shortener #' -#' @param url One or more urls to shorten or expand -#' @param filename An optional filename to use in the shortened URL (not yet -#' supported) -#' @param return When expanding, whether to return a long URL, an R list or a -#' JSON text fragment. -#' @param ... Additional arguments passed to \code{httr::POST} +#'@details The default filename for these fragments consists of the date and +#' time to the nearest second. For this reason you will have trouble generating +#' many of these links in quick succession. To overcome this limitation, you +#' can specify your own filename. We also provide two convenience naming +#' methods: #' -#' @return For \code{flyem_shorten_url} a character vector containing a short -#' URL. For \code{flyem_expand_url} see a character vector or list depending -#' on \code{return} argument. If the input \code{url} argument is named vector -#' of length>1, then the output will also be named. -#' @export -#' @details see +#'. \itemize{ +#' +#' \item md5 An md5 hash of the URL+title e.g. \code{"9a35fc580f710f3a62b2809a10fe106d.json"} +#' +#'. \item ms timestamp to the nearest millisecond e.g. \code{"1708773000.001.json"} +#' +#' } +#' +#' Note that this is an open endpoint so there are two potential security +#' concerns. URLs named by date/time can potentially be guessed and inspected. +#' Known URLs can be overwritten to point to a new location. If these are +#' concerns then the MD5 hash format has some advantages. +#' +#'@param url One or more URLs to shorten or expand +#'@param filename An optional filename to use in the shortened URL. You can also +#' provide a URL in which case the terminal filename will be extracted. +#'@param title An optional title for the webpage +#'@param method An optional scheme for automatic naming of shortened URLs. See +#' details. +#'@param return When expanding, whether to return a long URL, an R list or a +#' JSON text fragment. +#'@param ... Additional arguments passed to \code{httr::POST} +#' +#'@return For \code{flyem_shorten_url} a character vector containing a short +#' URL. For \code{flyem_expand_url} a character vector or list depending on the +#' \code{return} argument. If the input \code{url} argument is a named vector +#' of length>1, then the output will also be named. +#'@export +#'@details see #' \href{https://flyem-cns.slack.com/archives/C01BZB05M8C/p1669646269799509}{FlyEM #' CNS Slack} for more details. #' #' @examples #' \dontrun{ -#' su=flyem_shorten_url(manc_scene('group:10200')) +#' # this reads the URL from the clipboard +#' su=flyem_shorten_url(clipr::read_clip()) #' lu=flyem_expand_url(su) #' fafbseg::ngl_decode_scene(lu) #' # these give you the same result #' browseURL(su) #' browseURL(lu) +#' +#' # Generate many unique short URLs based on an MD5 hash of the long URL +#' sus=flyem_shorten_url("<Long URLs>", method='md5') #' } -flyem_shorten_url <- function(url, filename=NULL, ...) { +flyem_shorten_url <- function(url, filename=NA_character_, title=NA_character_, + method=c("default", "md5", "ms"), ...) { + method=match.arg(method) if(length(url)>1) { named=!is.null(names(url)) - res <- if(is.null(filename)) - pbapply::pbmapply(flyem_shorten_url, url=url, ..., USE.NAMES = named) - else - pbapply::pbmapply(flyem_shorten_url, url=url, filename=filename, ..., USE.NAMES = named) + res <- pbapply::pbmapply(flyem_shorten_url, url=url, + filename=filename, title=title, ..., + MoreArgs = list(method=method), USE.NAMES = named) return(res) } - # body=list(url, filename=filename) - # body=list(fafbseg::ngl_encode_url(url)) - body <- if(is.null(filename)) url else { - stop("filename argument not yet supported!") - list(url, filename=filename) + if(is.na(title)) title=NULL + if(method=='md5') { + md5=digest::digest(list(url, title), algo = 'md5') + filename=paste0(md5, ".json") + } else if(method=='ms') { + ts=format(round(as.numeric(Sys.time()), digits = 3), digits=15, scientific = F) + filename=paste0(ts, '.json') + } else { + # convert URL to filename as a convenience + if(isTRUE(grepl("^http(s){0,1}://", filename))) + filename=basename(filename) + } + if(is.na(filename)) filename=NULL + body <- if(is.null(filename) && is.null(title)) url else { + list(filename=filename, text=url, title=title) } us='https://shortng-bmcp5imp6q-uc.a.run.app/shortng' - res=httr::POST(url = us, body = body, encode = 'json', ...) + res=httr::POST(url = us, body = body, encode = 'multipart', ...) httr::stop_for_status(res) httr::content(res, as='text') - } #' @export diff --git a/man/flyem_shorten_url.Rd b/man/flyem_shorten_url.Rd index e97bfa1..3df9ed7 100644 --- a/man/flyem_shorten_url.Rd +++ b/man/flyem_shorten_url.Rd @@ -5,15 +5,26 @@ \alias{flyem_expand_url} \title{Shorten a Neuroglancer URL using the Janelia FlyEM link shortener} \usage{ -flyem_shorten_url(url, filename = NULL, ...) +flyem_shorten_url( + url, + filename = NA_character_, + title = NA_character_, + method = c("default", "md5", "ms"), + ... +) flyem_expand_url(url, return = c("url", "json", "parsed"), ...) } \arguments{ -\item{url}{One or more urls to shorten or expand} +\item{url}{One or more URLs to shorten or expand} -\item{filename}{An optional filename to use in the shortened URL (not yet -supported)} +\item{filename}{An optional filename to use in the shortened URL. You can also +provide a URL in which case the terminal filename will be extracted.} + +\item{title}{An optional title for the webpage} + +\item{method}{An optional scheme for automatic naming of shortened URLs. See +details.} \item{...}{Additional arguments passed to \code{httr::POST}} @@ -22,25 +33,48 @@ JSON text fragment.} } \value{ For \code{flyem_shorten_url} a character vector containing a short - URL. For \code{flyem_expand_url} see a character vector or list depending - on \code{return} argument. If the input \code{url} argument is named vector - of length>1, then the output will also be named. + URL. For \code{flyem_expand_url} a character vector or list depending on the + \code{return} argument. If the input \code{url} argument is a named vector + of length>1, then the output will also be named. } \description{ Shorten a Neuroglancer URL using the Janelia FlyEM link shortener } \details{ +The default filename for these fragments consists of the date and + time to the nearest second. For this reason you will have trouble generating + many of these links in quick succession. To overcome this limitation, you + can specify your own filename. We also provide two convenience naming + methods: + +. \itemize{ + + \item md5 An md5 hash of the URL+title e.g. \code{"9a35fc580f710f3a62b2809a10fe106d.json"} + +. \item ms timestamp to the nearest millisecond e.g. \code{"1708773000.001.json"} + + } + + Note that this is an open endpoint so there are two potential security + concerns. URLs named by date/time can potentially be guessed and inspected. + Known URLs can be overwritten to point to a new location. If these are + concerns then the MD5 hash format has some advantages. + see \href{https://flyem-cns.slack.com/archives/C01BZB05M8C/p1669646269799509}{FlyEM CNS Slack} for more details. } \examples{ \dontrun{ -su=flyem_shorten_url(manc_scene('group:10200')) +# this reads the URL from the clipboard +su=flyem_shorten_url(clipr::read_clip()) lu=flyem_expand_url(su) fafbseg::ngl_decode_scene(lu) # these give you the same result browseURL(su) browseURL(lu) + +# Generate many unique short URLs based on an MD5 hash of the long URL +sus=flyem_shorten_url("<Long URLs>", method='md5') } } From aeb0bc4eae9a6d0d241f051fb93bace2dadbad6b Mon Sep 17 00:00:00 2001 From: Gregory Jefferis <jefferis@gmail.com> Date: Sat, 24 Feb 2024 11:55:05 +0000 Subject: [PATCH 2/2] add url tests --- tests/testthat/test-url-shortening.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/testthat/test-url-shortening.R diff --git a/tests/testthat/test-url-shortening.R b/tests/testthat/test-url-shortening.R new file mode 100644 index 0000000..a893e09 --- /dev/null +++ b/tests/testthat/test-url-shortening.R @@ -0,0 +1,13 @@ +test_that("url shortening works", { + skip_if_offline() + + expect_is(lu <- malevnc::flyem_expand_url('https://neuroglancer-demo.appspot.com/#!gs://flyem-user-links/short/2023-12-15.090703.json'), 'character') + + expect_equal(su <- flyem_shorten_url(c(lu,lu), + title=c("test1", "test2"), + method = 'md5'), + c("https://neuroglancer-demo.appspot.com/#!gs://flyem-user-links/short/59a3c8a14ac42f1561713d5e3c609381.json", + "https://neuroglancer-demo.appspot.com/#!gs://flyem-user-links/short/8e0d1284e3af583db48d92dc02280eea.json" + ) + ) +})