From bf0136f6a974348605c8abd68cb42051dc6d93ac Mon Sep 17 00:00:00 2001 From: ncondits3 Date: Sun, 24 Dec 2023 07:43:10 -0500 Subject: [PATCH] First take at point-wise mutual info --- NAMESPACE | 36 ++++++++++--------- R/Distributions.R | 78 +++++++++++++++++++++++++++++++++--------- man/cleave.Rd | 6 +--- man/distributions.Rd | 22 ++++++------ man/entropy.Rd | 7 ++-- man/groupHumdrum.Rd | 4 +-- man/pullHumdrum.Rd | 2 +- man/rend.Rd | 2 +- man/selectedFields.Rd | 2 +- man/subset.humdrumR.Rd | 8 +---- man/withinHumdrum.Rd | 8 ++--- 11 files changed, 103 insertions(+), 72 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3817bdde..d683ad41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,9 @@ S3method(chord,default) S3method(chord,humdrumR) S3method(context,default) S3method(context,humdrumR) +S3method(count,default) +S3method(count,humdrumR) +S3method(count,table) S3method(deg,default) S3method(deg,humdrumR) S3method(degree,default) @@ -51,6 +54,8 @@ S3method(entropy,probability) S3method(expandPaths,humdrumR) S3method(figuredBass,default) S3method(figuredBass,humdrumR) +S3method(filter,distribution) +S3method(filter,humdrumR) S3method(format,struct) S3method(format,token) S3method(freq,default) @@ -60,8 +65,10 @@ S3method(fromgrid,integer) S3method(fromgrid,logical) S3method(fromgrid,matrix) S3method(fromgrid,numeric) +S3method(ggplot,humdrumR) S3method(grid,default) S3method(grid,humdrumR) +S3method(group_by,humdrumR) S3method(harm,default) S3method(harm,humdrumR) S3method(helmholtz,default) @@ -89,6 +96,7 @@ S3method(kern,humdrumR) S3method(lag,data.frame) S3method(lag,default) S3method(lag,matrix) +S3method(like,data.frame) S3method(lilypond,default) S3method(lilypond,humdrumR) S3method(measure,"NULL") @@ -106,6 +114,7 @@ S3method(mint,default) S3method(mint,humdrumR) S3method(ms,default) S3method(ms,humdrumR) +S3method(mutate,humdrumR) S3method(mutualInfo,default) S3method(mutualInfo,probability) S3method(names,humdrumR) @@ -120,9 +129,12 @@ S3method(pdist,count) S3method(pdist,data.frame) S3method(pdist,default) S3method(pdist,humdrumR) +S3method(pdist,probability) S3method(pdist,table) S3method(pitch,default) S3method(pitch,humdrumR) +S3method(pivot_longer,humdrumR) +S3method(pivot_wider,humdrumR) S3method(print,Excluder) S3method(print,distribution) S3method(print,fraction) @@ -133,6 +145,7 @@ S3method(print,humSpines) S3method(print,humdrumDispatch) S3method(print,humdrumR) S3method(print,humdrumRmethod) +S3method(pull,humdrumR) S3method(quarters,default) S3method(quarters,humdrumR) S3method(rbind,humdrumR.table) @@ -142,6 +155,7 @@ S3method(recip,default) S3method(recip,humdrumR) S3method(reference,character) S3method(reference,humdrumR) +S3method(reframe,humdrumR) S3method(rep,token) S3method(rhythmInterval,"NULL") S3method(rhythmInterval,character) @@ -152,8 +166,10 @@ S3method(rhythmInterval,numeric) S3method(rhythmInterval,token) S3method(roman,default) S3method(roman,humdrumR) +S3method(scale_type,token) S3method(seconds,default) S3method(seconds,humdrumR) +S3method(select,humdrumR) S3method(semits,default) S3method(semits,humdrumR) S3method(sigma,default) @@ -165,6 +181,7 @@ S3method(solfg,humdrumR) S3method(subset,humdrumR) S3method(sumTies,default) S3method(sumTies,humdrumR) +S3method(summarise,humdrumR) S3method(summary,humdrumR) S3method(tactus,"NULL") S3method(tactus,character) @@ -203,6 +220,7 @@ S3method(transpose,factor) S3method(transpose,numeric) S3method(transpose,token) S3method(transpose,tonalInterval) +S3method(ungroup,humdrumR) S3method(unique,struct) S3method(unique,token) S3method(with,humdrumR) @@ -308,9 +326,6 @@ export(combineFields) export(complement) export(context) export(count) -export(count.default) -export(count.humdrumR) -export(count.table) export(d1) export(d10) export(d11) @@ -359,8 +374,6 @@ export(expandPaths) export(fields) export(figuredBass) export(filter) -export(filter.distribution) -export(filter.humdrumR) export(findHumdrum) export(fraction) export(freq) @@ -369,10 +382,8 @@ export(gamut) export(getHumtab) export(getRE) export(getREexclusive) -export(ggplot.humdrumR) export(grid) export(group_by) -export(group_by.humdrumR) export(groups) export(harm) export(helmholtz) @@ -409,7 +420,7 @@ export(key) export(knownInterpretations) export(lag) export(lead) -export(likely) +export(like) export(lilypond) export(m10) export(m13) @@ -435,7 +446,6 @@ export(mint) export(ms) export(ms2bpm) export(mutate) -export(mutate.humdrumR) export(mutualInfo) export(namesSubcorpora) export(nbeats) @@ -455,11 +465,8 @@ export(order.tonalInterval) export(pc) export(pdist) export(pitch) -export(pivot_longer.humdrumR) -export(pivot_wider.humdrumR) export(printSilbeFormat) export(pull) -export(pull.humdrumR) export(pull_data.frame) export(pull_data.table) export(pull_tibble) @@ -471,7 +478,6 @@ export(recip) export(recordDuration) export(reference) export(reframe) -export(reframe.humdrumR) export(removeEmptyFiles) export(removeEmptyPaths) export(removeEmptyPieces) @@ -484,14 +490,12 @@ export(rhythmInterval) export(roman) export(romanKey) export(rootPosition) -export(scale_type.token) export(scale_x_token) export(sec2bpm) export(seconds) export(sections) export(segments) export(select) -export(select.humdrumR) export(selectedFields) export(semits) export(sigma) @@ -505,7 +509,6 @@ export(step) export(subpos) export(sumTies) export(summarise) -export(summarise.humdrumR) export(summarize) export(syncopation) export(table) @@ -529,7 +532,6 @@ export(uncontext) export(unfilter) export(unfoldStops) export(ungroup) -export(ungroup.humdrumR) export(unison) export(validateHumdrum) export(varnames) diff --git a/R/Distributions.R b/R/Distributions.R index 96b8d4a9..68611c1c 100644 --- a/R/Distributions.R +++ b/R/Distributions.R @@ -290,6 +290,7 @@ setMethod('[', c('distribution', 'ANY', 'missing'), function(x, i, drop = FALSE) { df <- as.data.frame(x)[i , , drop = FALSE] + if (drop) df else distribution(df, x) }) # @@ -322,9 +323,11 @@ setMethod('[', c('distribution', 'ANY', 'ANY'), }) setMethod('[', c('distribution', 'matrix'), - function(x, i, j, cartesian = FALSE, drop = FALSE) { + function(x, i, j, cartesian = TRUE, drop = FALSE) { + + i <- as.data.frame(i) + names(i) <- NULL - i <- as.list(i) do.call('[[', c(list(x, cartesian = cartesian, drop = drop), i)) }) @@ -342,6 +345,11 @@ setMethod('[[', 'distribution', missing <- sapply(args, rlang::is_missing) args[!missing] <- lapply(args[!missing], rlang::eval_tidy) + if (missing[2] && length(args) == 2L) { + args <- args[1L] + missing <- missing[1L] + } + levels <- getLevels(x) if (length(args) > length(levels)) .stop("This distribution only has {num2print(length(levels))} dimensions to index.", @@ -801,7 +809,7 @@ pdist.count <- function(x, ..., condition = NULL, na.rm = FALSE, sort = FALSE, dist <- distribution(x, 'p', N = n) - if (!is.null(condition)) dist <- conditionalize(dist, condition = condition) + if (!is.null(condition)) dist <- conditional(dist, condition = condition) if (sort) dist <- sort(dist, decreasing = sort > 0L) @@ -815,7 +823,7 @@ pdist.probability <- function(x, ..., condition = NULL, na.rm = FALSE, sort = F exprs <- rlang::enexprs(...) if (length(exprs)) condition <- pexprs(exprs, colnames(x), condition)$Condition %||% condition - if (!is.null(condition)) conditionalize(x, condition) else x + if (!is.null(condition)) conditional(x, condition) else x } @@ -924,7 +932,7 @@ pdist.table <- function(x, ..., condition = NULL, na.rm = FALSE, sort = FALSE, b -conditionalize <- function(pdist, condition) { +conditional <- function(pdist, condition) { varnames <- varnames(pdist) if (any(!condition %in% varnames)) .stop("We can only calculate a conditional probability across an existing dimension/factor.", "The {harvard(setdiff(varnames, condition), 'and')} are not dimensions of the given", @@ -1009,14 +1017,49 @@ pexprs <- function(exprs, colnames, condition) { ## Likelihoods ---- -###likelihood() ---- +### like() ---- #' @export -setGeneric('likely', function(x, log, ...) standardGeneric('likely')) +like <- function(..., distribution) UseMethod('like') + +#' @export +ic <- function(..., distribution, base = 2) -log(like(..., distribution = distribution), base = base) + +#' @export +like.default <- function(..., distribution) { + like.data.frame(data.frame(...), distribution = distribution) +} + +#' @export +like.data.frame <- function(df, ..., distribution) { + + if (missing(distribution)) distribution <- do.call('pdist', list(df, ...)) + + colnames <- colnames(df) + varnames <- varnames(distribution) + + if (!setequal(colnames, varnames)) .stop("To calculate likelihoods, the expected distribution must have the same variables as the observed variables.") + + distribution[as.matrix(df), , drop = TRUE]$p + +} + -#' @rdname entropy #' @export -setGeneric('ic', function(x, ...) standardGeneric('ic')) +pMI <- function(..., distribution, base = 2) { + df <- data.frame(...) + + if (missing(distribution)) distribution <- do.call('pdist', df) + + independent <- Reduce('*', lapply(varnames(distribution), \(j) distribution[ , j])) + + ic_observed <- ic(df, distribution = distribution, base = base) + ic_independent <- ic(df, distribution = independent, base = base) + + ic_independent - ic_observed + +} + ## Distributional ---- @@ -1071,7 +1114,7 @@ H <- entropy #' @rdname entropy #' @export entropy.probability <- function(q, p, condition = NULL, base = 2) { - if (!is.null(condition)) q <- conditionalize(q, condition) + if (!is.null(condition)) q <- conditional(q, condition) if (missing(p) || !inherits(p, 'probability')) { expected <- unconditional(q)$p @@ -1173,18 +1216,19 @@ mutualInfo <- function(..., base = 2) { #' @export mutualInfo.probability <- function(x, base = 2) { varnames <- varnames(x) - if (length(varnames) != 2L) .stop("Can't calculate mutual information of a single variable.") + if (length(varnames) < 2L) .stop("Can't calculate mutual information of a single variable.") x <- unconditional(x) observed <- setNames(x$p, do.call('paste', c(getLevels(x), list(sep = '.')))) - expected <- (x[ , 1] * x[ , 2]) - expected <- setNames(expected$p, do.call('paste', c(getLevels(expected), list(sep = '.')))) + independent <- Reduce('*', lapply(varnames, \(j) x[ , j])) + # expected <- (x[ , 1] * x[ , 2]) + independent <- setNames(independent$p, do.call('paste', c(getLevels(independent), list(sep = '.')))) - expected <- expected[names(observed)] + independent <- independent[names(observed)] - ratio <- observed / expected + ratio <- observed / independent logratio <- ifelse(ratio == 0 | ratio == Inf, 0, log(ratio, base = base)) equation <- Pequation(x, 'I', ';') @@ -1207,9 +1251,9 @@ mutualInfo.default <- function(..., base = 2) { -################################################### +##################################################- # table() extensions for humdrumR ---- ########### -################################################## +##################################################- diff --git a/man/cleave.Rd b/man/cleave.Rd index eca8ecb2..ff68891e 100644 --- a/man/cleave.Rd +++ b/man/cleave.Rd @@ -10,11 +10,7 @@ \usage{ cleave(humdrumR, ..., field = selectedFields(humdrumR)[1], newFields = NULL) -pivot_wider.humdrumR( - data, - names_from = "Spine", - values_from = selectedFields(data)[1] -) +\method{pivot_wider}{humdrumR}(data, names_from = "Spine", values_from = selectedFields(data)[1]) cleaveSpines(humdrumR, field = selectedFields(humdrumR)[1]) diff --git a/man/distributions.Rd b/man/distributions.Rd index 1d10c995..795026af 100644 --- a/man/distributions.Rd +++ b/man/distributions.Rd @@ -9,6 +9,7 @@ \alias{count.humdrumR} \alias{count.table} \alias{pdist.count} +\alias{pdist.probability} \alias{pdist.default} \alias{pdist.data.frame} \alias{pdist.humdrumR} @@ -29,26 +30,23 @@ \S4method{sort}{distribution}(x, decreasing = TRUE) -count.default( - ..., - sort = FALSE, - na.rm = FALSE, - .drop = FALSE, - binArgs = list() -) +\method{count}{default}(..., sort = FALSE, na.rm = FALSE, .drop = FALSE, binArgs = list()) + +\method{count}{humdrumR}(x, ..., sort = FALSE, na.rm = FALSE, .drop = FALSE, binArgs = list()) -count.humdrumR( +\method{count}{table}(..., sort = FALSE, na.rm = FALSE, .drop = FALSE) + +\method{pdist}{count}( x, ..., - sort = FALSE, + condition = NULL, na.rm = FALSE, + sort = FALSE, .drop = FALSE, binArgs = list() ) -count.table(..., sort = FALSE, na.rm = FALSE, .drop = FALSE) - -\method{pdist}{count}( +\method{pdist}{probability}( x, ..., condition = NULL, diff --git a/man/entropy.Rd b/man/entropy.Rd index 2913e693..a60d1585 100644 --- a/man/entropy.Rd +++ b/man/entropy.Rd @@ -1,7 +1,6 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Distributions.R -\name{ic} -\alias{ic} +\name{entropy} \alias{entropy} \alias{H} \alias{entropy.probability} @@ -13,13 +12,11 @@ \alias{mutualInfo.default} \title{Calculate Entropy or Information Content of variables} \usage{ -ic(x, ...) - entropy(..., base = 2) H(..., base = 2) -\method{entropy}{probability}(q, p, base = 2) +\method{entropy}{probability}(q, p, condition = NULL, base = 2) \method{entropy}{numeric}(x, base = 2, na.rm = TRUE) diff --git a/man/groupHumdrum.Rd b/man/groupHumdrum.Rd index b4d84106..4c4dde1c 100644 --- a/man/groupHumdrum.Rd +++ b/man/groupHumdrum.Rd @@ -9,9 +9,9 @@ \alias{groups} \title{Divide humdrumR data into groups} \usage{ -group_by.humdrumR(.data, ..., .add = TRUE) +\method{group_by}{humdrumR}(.data, ..., .add = TRUE) -ungroup.humdrumR(x, ...) +\method{ungroup}{humdrumR}(x, ...) groups(humdrumR, dataTypes = "D") } diff --git a/man/pullHumdrum.Rd b/man/pullHumdrum.Rd index 1e7dffd2..b0f67738 100644 --- a/man/pullHumdrum.Rd +++ b/man/pullHumdrum.Rd @@ -16,7 +16,7 @@ pull_data.frame(humdrumR, ..., dataTypes = "D", null = "charNA2dot") pull_tibble(humdrumR, ..., dataTypes = "D", null = "charNA2dot") -pull.humdrumR(.data, var, dataTypes = "D", null = "asis") +\method{pull}{humdrumR}(.data, var, dataTypes = "D", null = "asis") \S4method{$}{humdrumR}(x, name) } diff --git a/man/rend.Rd b/man/rend.Rd index 8c3c5a3f..5e0bfb9b 100644 --- a/man/rend.Rd +++ b/man/rend.Rd @@ -7,7 +7,7 @@ \usage{ rend(humdrumR, ..., fieldName = NULL, removeRended = TRUE, rendEmpty = FALSE) -pivot_longer.humdrumR(data, cols, ...) +\method{pivot_longer}{humdrumR}(data, cols, ...) } \arguments{ \item{humdrumR}{\emph{\strong{HumdrumR data.}} diff --git a/man/selectedFields.Rd b/man/selectedFields.Rd index 7a4be857..8cc4088d 100644 --- a/man/selectedFields.Rd +++ b/man/selectedFields.Rd @@ -8,7 +8,7 @@ \usage{ selectedFields(humdrumR) -select.humdrumR(.data, ..., fieldTypes = "any") +\method{select}{humdrumR}(.data, ..., fieldTypes = "any") } \arguments{ \item{humdrumR, .data}{\emph{\strong{HumdrumR data.}} diff --git a/man/subset.humdrumR.Rd b/man/subset.humdrumR.Rd index 9756620f..1e67b2dd 100644 --- a/man/subset.humdrumR.Rd +++ b/man/subset.humdrumR.Rd @@ -18,13 +18,7 @@ \usage{ \method{subset}{humdrumR}(x, ..., dataTypes = "D", .by = NULL, removeEmptyPieces = TRUE) -filter.humdrumR( - .data, - ..., - dataTypes = "D", - .by = NULL, - removeEmptyPieces = TRUE -) +\method{filter}{humdrumR}(.data, ..., dataTypes = "D", .by = NULL, removeEmptyPieces = TRUE) removeEmptyFiles(x) diff --git a/man/withinHumdrum.Rd b/man/withinHumdrum.Rd index b6ea6069..6c3186f1 100644 --- a/man/withinHumdrum.Rd +++ b/man/withinHumdrum.Rd @@ -35,7 +35,7 @@ variables = list() ) -mutate.humdrumR( +\method{mutate}{humdrumR}( .data, ..., dataTypes = "D", @@ -45,7 +45,7 @@ mutate.humdrumR( .by = NULL ) -summarise.humdrumR( +\method{summarise}{humdrumR}( .data, ..., dataTypes = "D", @@ -54,7 +54,7 @@ summarise.humdrumR( .by = NULL ) -reframe.humdrumR( +\method{reframe}{humdrumR}( .data, ..., dataTypes = "D", @@ -64,7 +64,7 @@ reframe.humdrumR( .by = NULL ) -ggplot.humdrumR(data = NULL, mapping = aes(), ..., dataTypes = "D") +\method{ggplot}{humdrumR}(data = NULL, mapping = aes(), ..., dataTypes = "D") } \arguments{ \item{data}{\emph{\strong{HumdrumR data.}}