From bf0136f6a974348605c8abd68cb42051dc6d93ac Mon Sep 17 00:00:00 2001
From: ncondits3 <ncondits3@gatech.edu>
Date: Sun, 24 Dec 2023 07:43:10 -0500
Subject: [PATCH] First take at point-wise mutual info

---
 NAMESPACE              | 36 ++++++++++---------
 R/Distributions.R      | 78 +++++++++++++++++++++++++++++++++---------
 man/cleave.Rd          |  6 +---
 man/distributions.Rd   | 22 ++++++------
 man/entropy.Rd         |  7 ++--
 man/groupHumdrum.Rd    |  4 +--
 man/pullHumdrum.Rd     |  2 +-
 man/rend.Rd            |  2 +-
 man/selectedFields.Rd  |  2 +-
 man/subset.humdrumR.Rd |  8 +----
 man/withinHumdrum.Rd   |  8 ++---
 11 files changed, 103 insertions(+), 72 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 3817bdde..d683ad41 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,9 @@ S3method(chord,default)
 S3method(chord,humdrumR)
 S3method(context,default)
 S3method(context,humdrumR)
+S3method(count,default)
+S3method(count,humdrumR)
+S3method(count,table)
 S3method(deg,default)
 S3method(deg,humdrumR)
 S3method(degree,default)
@@ -51,6 +54,8 @@ S3method(entropy,probability)
 S3method(expandPaths,humdrumR)
 S3method(figuredBass,default)
 S3method(figuredBass,humdrumR)
+S3method(filter,distribution)
+S3method(filter,humdrumR)
 S3method(format,struct)
 S3method(format,token)
 S3method(freq,default)
@@ -60,8 +65,10 @@ S3method(fromgrid,integer)
 S3method(fromgrid,logical)
 S3method(fromgrid,matrix)
 S3method(fromgrid,numeric)
+S3method(ggplot,humdrumR)
 S3method(grid,default)
 S3method(grid,humdrumR)
+S3method(group_by,humdrumR)
 S3method(harm,default)
 S3method(harm,humdrumR)
 S3method(helmholtz,default)
@@ -89,6 +96,7 @@ S3method(kern,humdrumR)
 S3method(lag,data.frame)
 S3method(lag,default)
 S3method(lag,matrix)
+S3method(like,data.frame)
 S3method(lilypond,default)
 S3method(lilypond,humdrumR)
 S3method(measure,"NULL")
@@ -106,6 +114,7 @@ S3method(mint,default)
 S3method(mint,humdrumR)
 S3method(ms,default)
 S3method(ms,humdrumR)
+S3method(mutate,humdrumR)
 S3method(mutualInfo,default)
 S3method(mutualInfo,probability)
 S3method(names,humdrumR)
@@ -120,9 +129,12 @@ S3method(pdist,count)
 S3method(pdist,data.frame)
 S3method(pdist,default)
 S3method(pdist,humdrumR)
+S3method(pdist,probability)
 S3method(pdist,table)
 S3method(pitch,default)
 S3method(pitch,humdrumR)
+S3method(pivot_longer,humdrumR)
+S3method(pivot_wider,humdrumR)
 S3method(print,Excluder)
 S3method(print,distribution)
 S3method(print,fraction)
@@ -133,6 +145,7 @@ S3method(print,humSpines)
 S3method(print,humdrumDispatch)
 S3method(print,humdrumR)
 S3method(print,humdrumRmethod)
+S3method(pull,humdrumR)
 S3method(quarters,default)
 S3method(quarters,humdrumR)
 S3method(rbind,humdrumR.table)
@@ -142,6 +155,7 @@ S3method(recip,default)
 S3method(recip,humdrumR)
 S3method(reference,character)
 S3method(reference,humdrumR)
+S3method(reframe,humdrumR)
 S3method(rep,token)
 S3method(rhythmInterval,"NULL")
 S3method(rhythmInterval,character)
@@ -152,8 +166,10 @@ S3method(rhythmInterval,numeric)
 S3method(rhythmInterval,token)
 S3method(roman,default)
 S3method(roman,humdrumR)
+S3method(scale_type,token)
 S3method(seconds,default)
 S3method(seconds,humdrumR)
+S3method(select,humdrumR)
 S3method(semits,default)
 S3method(semits,humdrumR)
 S3method(sigma,default)
@@ -165,6 +181,7 @@ S3method(solfg,humdrumR)
 S3method(subset,humdrumR)
 S3method(sumTies,default)
 S3method(sumTies,humdrumR)
+S3method(summarise,humdrumR)
 S3method(summary,humdrumR)
 S3method(tactus,"NULL")
 S3method(tactus,character)
@@ -203,6 +220,7 @@ S3method(transpose,factor)
 S3method(transpose,numeric)
 S3method(transpose,token)
 S3method(transpose,tonalInterval)
+S3method(ungroup,humdrumR)
 S3method(unique,struct)
 S3method(unique,token)
 S3method(with,humdrumR)
@@ -308,9 +326,6 @@ export(combineFields)
 export(complement)
 export(context)
 export(count)
-export(count.default)
-export(count.humdrumR)
-export(count.table)
 export(d1)
 export(d10)
 export(d11)
@@ -359,8 +374,6 @@ export(expandPaths)
 export(fields)
 export(figuredBass)
 export(filter)
-export(filter.distribution)
-export(filter.humdrumR)
 export(findHumdrum)
 export(fraction)
 export(freq)
@@ -369,10 +382,8 @@ export(gamut)
 export(getHumtab)
 export(getRE)
 export(getREexclusive)
-export(ggplot.humdrumR)
 export(grid)
 export(group_by)
-export(group_by.humdrumR)
 export(groups)
 export(harm)
 export(helmholtz)
@@ -409,7 +420,7 @@ export(key)
 export(knownInterpretations)
 export(lag)
 export(lead)
-export(likely)
+export(like)
 export(lilypond)
 export(m10)
 export(m13)
@@ -435,7 +446,6 @@ export(mint)
 export(ms)
 export(ms2bpm)
 export(mutate)
-export(mutate.humdrumR)
 export(mutualInfo)
 export(namesSubcorpora)
 export(nbeats)
@@ -455,11 +465,8 @@ export(order.tonalInterval)
 export(pc)
 export(pdist)
 export(pitch)
-export(pivot_longer.humdrumR)
-export(pivot_wider.humdrumR)
 export(printSilbeFormat)
 export(pull)
-export(pull.humdrumR)
 export(pull_data.frame)
 export(pull_data.table)
 export(pull_tibble)
@@ -471,7 +478,6 @@ export(recip)
 export(recordDuration)
 export(reference)
 export(reframe)
-export(reframe.humdrumR)
 export(removeEmptyFiles)
 export(removeEmptyPaths)
 export(removeEmptyPieces)
@@ -484,14 +490,12 @@ export(rhythmInterval)
 export(roman)
 export(romanKey)
 export(rootPosition)
-export(scale_type.token)
 export(scale_x_token)
 export(sec2bpm)
 export(seconds)
 export(sections)
 export(segments)
 export(select)
-export(select.humdrumR)
 export(selectedFields)
 export(semits)
 export(sigma)
@@ -505,7 +509,6 @@ export(step)
 export(subpos)
 export(sumTies)
 export(summarise)
-export(summarise.humdrumR)
 export(summarize)
 export(syncopation)
 export(table)
@@ -529,7 +532,6 @@ export(uncontext)
 export(unfilter)
 export(unfoldStops)
 export(ungroup)
-export(ungroup.humdrumR)
 export(unison)
 export(validateHumdrum)
 export(varnames)
diff --git a/R/Distributions.R b/R/Distributions.R
index 96b8d4a9..68611c1c 100644
--- a/R/Distributions.R
+++ b/R/Distributions.R
@@ -290,6 +290,7 @@ setMethod('[', c('distribution', 'ANY', 'missing'),
           function(x, i, drop = FALSE) {
 
             df <- as.data.frame(x)[i , ,  drop = FALSE]
+            
             if (drop) df else distribution(df, x)
           })
 # 
@@ -322,9 +323,11 @@ setMethod('[', c('distribution', 'ANY', 'ANY'),
           })
 
 setMethod('[', c('distribution', 'matrix'),
-          function(x, i, j, cartesian = FALSE, drop = FALSE) {
+          function(x, i, j, cartesian = TRUE, drop = FALSE) {
+            
+            i <- as.data.frame(i)
+            names(i) <- NULL
             
-            i <- as.list(i)
             do.call('[[', c(list(x, cartesian = cartesian, drop = drop), i))
             
           })
@@ -342,6 +345,11 @@ setMethod('[[', 'distribution',
             missing <- sapply(args, rlang::is_missing)
             args[!missing] <- lapply(args[!missing], rlang::eval_tidy)
             
+            if (missing[2] && length(args) == 2L) {
+              args <- args[1L]
+              missing <- missing[1L]
+            }
+            
         
             levels <- getLevels(x)
             if (length(args) > length(levels)) .stop("This distribution only has {num2print(length(levels))} dimensions to index.",
@@ -801,7 +809,7 @@ pdist.count <-  function(x, ..., condition = NULL, na.rm = FALSE, sort = FALSE,
   
   dist <- distribution(x, 'p', N = n)
   
-  if (!is.null(condition)) dist <- conditionalize(dist, condition = condition)
+  if (!is.null(condition)) dist <- conditional(dist, condition = condition)
   
   if (sort) dist <- sort(dist, decreasing = sort > 0L)
   
@@ -815,7 +823,7 @@ pdist.probability <-  function(x, ..., condition = NULL, na.rm = FALSE, sort = F
   exprs <- rlang::enexprs(...)
   if (length(exprs)) condition <- pexprs(exprs, colnames(x), condition)$Condition %||% condition
   
-  if (!is.null(condition)) conditionalize(x, condition) else x
+  if (!is.null(condition)) conditional(x, condition) else x
   
 }
 
@@ -924,7 +932,7 @@ pdist.table <- function(x, ..., condition = NULL, na.rm = FALSE, sort = FALSE, b
 
 
 
-conditionalize <- function(pdist, condition) {
+conditional <- function(pdist, condition) {
   varnames <- varnames(pdist)
   if (any(!condition %in% varnames)) .stop("We can only calculate a conditional probability across an existing dimension/factor.",
                                            "The <conditions|condition> {harvard(setdiff(varnames, condition), 'and')} are not dimensions of the given",
@@ -1009,14 +1017,49 @@ pexprs <- function(exprs, colnames, condition) {
 
 ## Likelihoods ----
 
-###likelihood() ----
+### like() ----
 
 #' @export
-setGeneric('likely', function(x, log, ...) standardGeneric('likely'))
+like <- function(..., distribution) UseMethod('like')
+
+#' @export
+ic <- function(..., distribution, base = 2) -log(like(..., distribution = distribution), base = base)
+
+#' @export
+like.default <- function(..., distribution) {
+  like.data.frame(data.frame(...), distribution = distribution)
+}
+
+#' @export
+like.data.frame <- function(df, ..., distribution) {
+  
+  if (missing(distribution)) distribution <- do.call('pdist', list(df, ...))
+  
+  colnames <- colnames(df)
+  varnames <- varnames(distribution)
+  
+  if (!setequal(colnames, varnames)) .stop("To calculate likelihoods, the expected distribution must have the same variables as the observed variables.")
+
+  distribution[as.matrix(df), , drop = TRUE]$p
+  
+}
+
 
-#' @rdname entropy
 #' @export
-setGeneric('ic', function(x, ...) standardGeneric('ic'))
+pMI <- function(..., distribution, base = 2) {
+  df <- data.frame(...)
+  
+  if (missing(distribution)) distribution <- do.call('pdist', df)
+  
+  independent <- Reduce('*', lapply(varnames(distribution), \(j) distribution[ , j]))
+  
+  ic_observed <- ic(df, distribution = distribution, base = base)
+  ic_independent <- ic(df, distribution = independent, base = base)
+  
+  ic_independent - ic_observed
+  
+}
+
 
 ## Distributional ----
 
@@ -1071,7 +1114,7 @@ H <- entropy
 #' @rdname entropy
 #' @export
 entropy.probability <-  function(q, p, condition = NULL, base = 2) {
-            if (!is.null(condition)) q <- conditionalize(q, condition)
+            if (!is.null(condition)) q <- conditional(q, condition)
   
             if (missing(p) || !inherits(p, 'probability')) {
               expected <- unconditional(q)$p
@@ -1173,18 +1216,19 @@ mutualInfo <- function(..., base = 2) {
 #' @export
 mutualInfo.probability <-  function(x, base = 2) {
   varnames <- varnames(x)
-  if (length(varnames) != 2L) .stop("Can't calculate mutual information of a single variable.")
+  if (length(varnames) < 2L) .stop("Can't calculate mutual information of a single variable.")
   
   x <- unconditional(x)
   
   observed <- setNames(x$p, do.call('paste', c(getLevels(x), list(sep = '.'))))
   
-  expected <- (x[ , 1] * x[ , 2])
-  expected <- setNames(expected$p, do.call('paste', c(getLevels(expected), list(sep = '.'))))
+  independent <- Reduce('*', lapply(varnames, \(j) x[ , j]))
+  # expected <- (x[ , 1] * x[ , 2])
+  independent <- setNames(independent$p, do.call('paste', c(getLevels(independent), list(sep = '.'))))
   
-  expected <- expected[names(observed)]
+  independent <- independent[names(observed)]
   
-  ratio <- observed / expected
+  ratio <- observed / independent
   logratio <- ifelse(ratio == 0 | ratio == Inf, 0, log(ratio, base = base))
   
   equation <- Pequation(x, 'I', ';')
@@ -1207,9 +1251,9 @@ mutualInfo.default <- function(..., base = 2) {
 
 
 
-###################################################
+##################################################-
 # table() extensions for humdrumR ---- ###########
-##################################################
+##################################################-
 
 
 
diff --git a/man/cleave.Rd b/man/cleave.Rd
index eca8ecb2..ff68891e 100644
--- a/man/cleave.Rd
+++ b/man/cleave.Rd
@@ -10,11 +10,7 @@
 \usage{
 cleave(humdrumR, ..., field = selectedFields(humdrumR)[1], newFields = NULL)
 
-pivot_wider.humdrumR(
-  data,
-  names_from = "Spine",
-  values_from = selectedFields(data)[1]
-)
+\method{pivot_wider}{humdrumR}(data, names_from = "Spine", values_from = selectedFields(data)[1])
 
 cleaveSpines(humdrumR, field = selectedFields(humdrumR)[1])
 
diff --git a/man/distributions.Rd b/man/distributions.Rd
index 1d10c995..795026af 100644
--- a/man/distributions.Rd
+++ b/man/distributions.Rd
@@ -9,6 +9,7 @@
 \alias{count.humdrumR}
 \alias{count.table}
 \alias{pdist.count}
+\alias{pdist.probability}
 \alias{pdist.default}
 \alias{pdist.data.frame}
 \alias{pdist.humdrumR}
@@ -29,26 +30,23 @@
 
 \S4method{sort}{distribution}(x, decreasing = TRUE)
 
-count.default(
-  ...,
-  sort = FALSE,
-  na.rm = FALSE,
-  .drop = FALSE,
-  binArgs = list()
-)
+\method{count}{default}(..., sort = FALSE, na.rm = FALSE, .drop = FALSE, binArgs = list())
+
+\method{count}{humdrumR}(x, ..., sort = FALSE, na.rm = FALSE, .drop = FALSE, binArgs = list())
 
-count.humdrumR(
+\method{count}{table}(..., sort = FALSE, na.rm = FALSE, .drop = FALSE)
+
+\method{pdist}{count}(
   x,
   ...,
-  sort = FALSE,
+  condition = NULL,
   na.rm = FALSE,
+  sort = FALSE,
   .drop = FALSE,
   binArgs = list()
 )
 
-count.table(..., sort = FALSE, na.rm = FALSE, .drop = FALSE)
-
-\method{pdist}{count}(
+\method{pdist}{probability}(
   x,
   ...,
   condition = NULL,
diff --git a/man/entropy.Rd b/man/entropy.Rd
index 2913e693..a60d1585 100644
--- a/man/entropy.Rd
+++ b/man/entropy.Rd
@@ -1,7 +1,6 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/Distributions.R
-\name{ic}
-\alias{ic}
+\name{entropy}
 \alias{entropy}
 \alias{H}
 \alias{entropy.probability}
@@ -13,13 +12,11 @@
 \alias{mutualInfo.default}
 \title{Calculate Entropy or Information Content of variables}
 \usage{
-ic(x, ...)
-
 entropy(..., base = 2)
 
 H(..., base = 2)
 
-\method{entropy}{probability}(q, p, base = 2)
+\method{entropy}{probability}(q, p, condition = NULL, base = 2)
 
 \method{entropy}{numeric}(x, base = 2, na.rm = TRUE)
 
diff --git a/man/groupHumdrum.Rd b/man/groupHumdrum.Rd
index b4d84106..4c4dde1c 100644
--- a/man/groupHumdrum.Rd
+++ b/man/groupHumdrum.Rd
@@ -9,9 +9,9 @@
 \alias{groups}
 \title{Divide humdrumR data into groups}
 \usage{
-group_by.humdrumR(.data, ..., .add = TRUE)
+\method{group_by}{humdrumR}(.data, ..., .add = TRUE)
 
-ungroup.humdrumR(x, ...)
+\method{ungroup}{humdrumR}(x, ...)
 
 groups(humdrumR, dataTypes = "D")
 }
diff --git a/man/pullHumdrum.Rd b/man/pullHumdrum.Rd
index 1e7dffd2..b0f67738 100644
--- a/man/pullHumdrum.Rd
+++ b/man/pullHumdrum.Rd
@@ -16,7 +16,7 @@ pull_data.frame(humdrumR, ..., dataTypes = "D", null = "charNA2dot")
 
 pull_tibble(humdrumR, ..., dataTypes = "D", null = "charNA2dot")
 
-pull.humdrumR(.data, var, dataTypes = "D", null = "asis")
+\method{pull}{humdrumR}(.data, var, dataTypes = "D", null = "asis")
 
 \S4method{$}{humdrumR}(x, name)
 }
diff --git a/man/rend.Rd b/man/rend.Rd
index 8c3c5a3f..5e0bfb9b 100644
--- a/man/rend.Rd
+++ b/man/rend.Rd
@@ -7,7 +7,7 @@
 \usage{
 rend(humdrumR, ..., fieldName = NULL, removeRended = TRUE, rendEmpty = FALSE)
 
-pivot_longer.humdrumR(data, cols, ...)
+\method{pivot_longer}{humdrumR}(data, cols, ...)
 }
 \arguments{
 \item{humdrumR}{\emph{\strong{HumdrumR data.}}
diff --git a/man/selectedFields.Rd b/man/selectedFields.Rd
index 7a4be857..8cc4088d 100644
--- a/man/selectedFields.Rd
+++ b/man/selectedFields.Rd
@@ -8,7 +8,7 @@
 \usage{
 selectedFields(humdrumR)
 
-select.humdrumR(.data, ..., fieldTypes = "any")
+\method{select}{humdrumR}(.data, ..., fieldTypes = "any")
 }
 \arguments{
 \item{humdrumR, .data}{\emph{\strong{HumdrumR data.}}
diff --git a/man/subset.humdrumR.Rd b/man/subset.humdrumR.Rd
index 9756620f..1e67b2dd 100644
--- a/man/subset.humdrumR.Rd
+++ b/man/subset.humdrumR.Rd
@@ -18,13 +18,7 @@
 \usage{
 \method{subset}{humdrumR}(x, ..., dataTypes = "D", .by = NULL, removeEmptyPieces = TRUE)
 
-filter.humdrumR(
-  .data,
-  ...,
-  dataTypes = "D",
-  .by = NULL,
-  removeEmptyPieces = TRUE
-)
+\method{filter}{humdrumR}(.data, ..., dataTypes = "D", .by = NULL, removeEmptyPieces = TRUE)
 
 removeEmptyFiles(x)
 
diff --git a/man/withinHumdrum.Rd b/man/withinHumdrum.Rd
index b6ea6069..6c3186f1 100644
--- a/man/withinHumdrum.Rd
+++ b/man/withinHumdrum.Rd
@@ -35,7 +35,7 @@
   variables = list()
 )
 
-mutate.humdrumR(
+\method{mutate}{humdrumR}(
   .data,
   ...,
   dataTypes = "D",
@@ -45,7 +45,7 @@ mutate.humdrumR(
   .by = NULL
 )
 
-summarise.humdrumR(
+\method{summarise}{humdrumR}(
   .data,
   ...,
   dataTypes = "D",
@@ -54,7 +54,7 @@ summarise.humdrumR(
   .by = NULL
 )
 
-reframe.humdrumR(
+\method{reframe}{humdrumR}(
   .data,
   ...,
   dataTypes = "D",
@@ -64,7 +64,7 @@ reframe.humdrumR(
   .by = NULL
 )
 
-ggplot.humdrumR(data = NULL, mapping = aes(), ..., dataTypes = "D")
+\method{ggplot}{humdrumR}(data = NULL, mapping = aes(), ..., dataTypes = "D")
 }
 \arguments{
 \item{data}{\emph{\strong{HumdrumR data.}}