From a5c238b1bebdf95e1befdc92e9864e5cc101535a Mon Sep 17 00:00:00 2001 From: Vincent Arel-Bundock Date: Sat, 19 Oct 2024 23:03:19 -0400 Subject: [PATCH] docs --- altdoc/quarto_website.yml | 2 +- docs/CITATION.html | 6 +- docs/LICENSE.html | 4 +- docs/NEWS.html | 4 +- docs/index.html | 4 +- docs/man/comparisons.html | 4 +- docs/man/datagrid.html | 4 +- docs/man/hypotheses.html | 4 +- docs/man/inferences.html | 4 +- docs/man/plot_comparisons.html | 4 +- docs/man/plot_predictions.html | 4 +- docs/man/plot_slopes.html | 4 +- docs/man/posterior_draws.html | 4 +- docs/man/predictions.html | 4 +- docs/man/print.marginaleffects.html | 4 +- docs/man/slopes.html | 4 +- docs/quarto_website.yml | 9 +- docs/search.json | 177 +++++++++++++++------------- 18 files changed, 125 insertions(+), 125 deletions(-) diff --git a/altdoc/quarto_website.yml b/altdoc/quarto_website.yml index 37ea33e35..514d06f86 100644 --- a/altdoc/quarto_website.yml +++ b/altdoc/quarto_website.yml @@ -35,7 +35,7 @@ website: collapse-level: 1 contents: - text: Home - file: index.md + href: https://marginaleffects.com - section: Functions contents: - text: "`predictions`" diff --git a/docs/CITATION.html b/docs/CITATION.html index 6cc75d23c..007987df9 100644 --- a/docs/CITATION.html +++ b/docs/CITATION.html @@ -144,7 +144,7 @@ - + @@ -253,8 +253,8 @@ diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 77ba8b87b..f4ec1274f 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -253,8 +253,8 @@ diff --git a/docs/NEWS.html b/docs/NEWS.html index 48c7b3028..2258a9b94 100644 --- a/docs/NEWS.html +++ b/docs/NEWS.html @@ -253,8 +253,8 @@ diff --git a/docs/index.html b/docs/index.html index b450cc148..e7654134d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -287,8 +287,8 @@ diff --git a/docs/man/comparisons.html b/docs/man/comparisons.html index 1768e22e1..cef73eb24 100644 --- a/docs/man/comparisons.html +++ b/docs/man/comparisons.html @@ -295,8 +295,8 @@ diff --git a/docs/man/datagrid.html b/docs/man/datagrid.html index 78aa63470..c4b3da47f 100644 --- a/docs/man/datagrid.html +++ b/docs/man/datagrid.html @@ -270,8 +270,8 @@ diff --git a/docs/man/hypotheses.html b/docs/man/hypotheses.html index 88f2e3d04..e3beea3b0 100644 --- a/docs/man/hypotheses.html +++ b/docs/man/hypotheses.html @@ -295,8 +295,8 @@ diff --git a/docs/man/inferences.html b/docs/man/inferences.html index b48982ac9..f00ec4703 100644 --- a/docs/man/inferences.html +++ b/docs/man/inferences.html @@ -270,8 +270,8 @@ diff --git a/docs/man/plot_comparisons.html b/docs/man/plot_comparisons.html index b8c12a689..2eeb77563 100644 --- a/docs/man/plot_comparisons.html +++ b/docs/man/plot_comparisons.html @@ -270,8 +270,8 @@ diff --git a/docs/man/plot_predictions.html b/docs/man/plot_predictions.html index 3c549486f..09450e0f6 100644 --- a/docs/man/plot_predictions.html +++ b/docs/man/plot_predictions.html @@ -270,8 +270,8 @@ diff --git a/docs/man/plot_slopes.html b/docs/man/plot_slopes.html index 7f0903815..83be90f4d 100644 --- a/docs/man/plot_slopes.html +++ b/docs/man/plot_slopes.html @@ -270,8 +270,8 @@ diff --git a/docs/man/posterior_draws.html b/docs/man/posterior_draws.html index 6831ffc9e..18f6acf9f 100644 --- a/docs/man/posterior_draws.html +++ b/docs/man/posterior_draws.html @@ -253,8 +253,8 @@ diff --git a/docs/man/predictions.html b/docs/man/predictions.html index f26457556..1f831bbe8 100644 --- a/docs/man/predictions.html +++ b/docs/man/predictions.html @@ -295,8 +295,8 @@ diff --git a/docs/man/print.marginaleffects.html b/docs/man/print.marginaleffects.html index dec66ffb6..ce98df697 100644 --- a/docs/man/print.marginaleffects.html +++ b/docs/man/print.marginaleffects.html @@ -270,8 +270,8 @@ diff --git a/docs/man/slopes.html b/docs/man/slopes.html index 7a64cb0d5..c3e925dea 100644 --- a/docs/man/slopes.html +++ b/docs/man/slopes.html @@ -295,8 +295,8 @@ diff --git a/docs/quarto_website.yml b/docs/quarto_website.yml index 0b39d7ce2..37ea33e35 100644 --- a/docs/quarto_website.yml +++ b/docs/quarto_website.yml @@ -62,16 +62,9 @@ website: file: man/print.marginaleffects.qmd - text: News file: $ALTDOC_NEWS - - text: Changelog - file: $ALTDOC_CHANGELOG - text: License file: $ALTDOC_LICENSE - - text: Licence - file: $ALTDOC_LICENCE - - text: Code of conduct - file: $ALTDOC_CODE_OF_CONDUCT - - text: Citation - file: $ALTDOC_CITATION + - vignettes/citation.qmd format: html: diff --git a/docs/search.json b/docs/search.json index d3fa3caa0..a9790e9ab 100644 --- a/docs/search.json +++ b/docs/search.json @@ -4,11 +4,7 @@ "href": "CITATION.html", "title": "Citation", "section": "", - "text": "Citation\nTo cite marginaleffects in publications, please use:\n\nArel-Bundock V, Greifer N, Heiss A (Forthcoming). “How to Interpret Statistical Models Using in and .” Journal of Statistical Software. https://marginaleffects.com.", - "crumbs": [ - "Model to Meaning", - "Citation" - ] + "text": "Citation\nTo cite marginaleffects in publications, please use:\n\nArel-Bundock V, Greifer N, Heiss A (Forthcoming). “How to Interpret Statistical Models Using in and .” Journal of Statistical Software. https://marginaleffects.com." }, { "objectID": "NEWS.html", @@ -407,123 +403,134 @@ ] }, { - "objectID": "man/hypotheses.html", - "href": "man/hypotheses.html", + "objectID": "vignettes/citation.html", + "href": "vignettes/citation.html", + "title": "Citation and License", + "section": "", + "text": "Citation and License\nTo cite marginaleffects in publications, please use:\n\nArel-Bundock V, Greifer N, Heiss A (Forthcoming). “How to Interpret Statistical Models Using marginaleffects in R and Python.” Journal of Statistical Software.\n\n@Article{,\n title = {How to Interpret Statistical Models Using {marginaleffects} in {R} and {Python}},\n author = {Vincent Arel-Bundock and Noah Greifer and Andrew Heiss},\n year = {Forthcoming},\n journal = {Journal of Statistical Software},\n}\nThe marginaleffects software code is licenced under the GPLv3\nThe contents of this website and the Model to Meaning book are copyrighted and may not be reproduced without permission.\n© 2024 Vincent Arel-Bundock. All Rights Reserved.", + "crumbs": [ + "Model to Meaning", + "Citation and License" + ] + }, + { + "objectID": "man/predictions.html", + "href": "man/predictions.html", "title": "", "section": "", - "text": "Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, hypotheses emulates the behavior of the excellent and well-established car::deltaMethod and car::linearHypothesis functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors.\nTo learn more, read the hypothesis tests vignette, visit the package website, or scroll down this page for a full list of vignettes:\n\n\nhttps://marginaleffects.com/vignettes/hypothesis.html\n\n\nhttps://marginaleffects.com/\n\n\nWarning #1: Tests are conducted directly on the scale defined by the type argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the “link” scale instead of the “response” scale which is often the default.\nWarning #2: For hypothesis tests on objects produced by the marginaleffects package, it is safer to use the hypothesis argument of the original function. Using hypotheses() may not work in certain environments, in lists, or when working programmatically with *apply style functions.\nWarning #3: The tests assume that the hypothesis expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the inferences() function with method = “boot”.\n\nhypotheses(\n model,\n hypothesis = NULL,\n vcov = NULL,\n conf_level = 0.95,\n df = NULL,\n equivalence = NULL,\n joint = FALSE,\n joint_test = \"f\",\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object or object generated by the comparisons(), slopes(), or predictions() functions.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When using joint_test=“f”, the df argument should be a numeric vector of length 2.\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\njoint\n\n\nJoint test of statistical significance. The null hypothesis value can be set using the hypothesis argument.\n\n\nFALSE: Hypotheses are not tested jointly.\n\n\nTRUE: All parameters are tested jointly.\n\n\nString: A regular expression to match parameters to be tested jointly. grep(joint, perl = TRUE)\n\n\nCharacter vector of parameter names to be tested. Characters refer to the names of the vector returned by coef(object).\n\n\nInteger vector of indices. Which parameters positions to test jointly.\n\n\n\n\n\n\njoint_test\n\n\nA character string specifying the type of test, either \"f\" or \"chisq\". The null hypothesis is set by the hypothesis argument, with default null equal to 0 for all parameters.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nThe test statistic for the joint Wald test is calculated as (R * theta_hat - r)’ * inv(R * V_hat * R’) * (R * theta_hat - r) / Q, where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters, r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized.\nThe p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test). For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters. For the Chi-squared test, the degrees of freedom are Q.\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars)\n\nhypotheses(mod)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n (Intercept) 35.8460 2.041 17.56 <0.001 227.0 31.8457 39.846319\n hp -0.0231 0.012 -1.93 0.0531 4.2 -0.0465 0.000306\n wt -3.1814 0.720 -4.42 <0.001 16.6 -4.5918 -1.771012\n factor(cyl)6 -3.3590 1.402 -2.40 0.0166 5.9 -6.1062 -0.611803\n factor(cyl)8 -3.1859 2.170 -1.47 0.1422 2.8 -7.4399 1.068169\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Test of equality between coefficients\nhypotheses(mod, hypothesis = \"hp = wt\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Non-linear function\nhypotheses(mod, hypothesis = \"exp(hp + wt) = 0.1\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.0594 0.0292 -2.04 0.0418 4.6 -0.117 -0.0022\n\nTerm: exp(hp + wt) = 0.1\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Robust standard errors\nhypotheses(mod, hypothesis = \"hp = wt\", vcov = \"HC3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.805 3.92 <0.001 13.5 1.58 4.74\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# b1, b2, ... shortcuts can be used to identify the position of the\n# parameters of interest in the output of\nhypotheses(mod, hypothesis = \"b2 = b3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: b2 = b3\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# wildcard\nhypotheses(mod, hypothesis = \"b* / b2 = 1\")\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n b1 / b2 = 1 -1551 764.0 -2.03 0.0423 4.6 -3048.9 -54\n b2 / b2 = 1 0 NA NA NA NA NA NA\n b3 / b2 = 1 137 78.1 1.75 0.0804 3.6 -16.6 290\n b4 / b2 = 1 144 111.0 1.30 0.1938 2.4 -73.3 362\n b5 / b2 = 1 137 151.9 0.90 0.3679 1.4 -161.0 435\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# term names with special characters have to be enclosed in backticks\nhypotheses(mod, hypothesis = \"`factor(cyl)6` = `factor(cyl)8`\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.173 1.65 -0.105 0.917 0.1 -3.41 3.07\n\nTerm: `factor(cyl)6` = `factor(cyl)8`\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmod2 <- lm(mpg ~ hp * drat, data = mtcars)\nhypotheses(mod2, hypothesis = \"`hp:drat` = drat\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -6.08 2.89 -2.1 0.0357 4.8 -11.8 -0.405\n\nTerm: `hp:drat` = drat\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# predictions(), comparisons(), and slopes()\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\ncmp <- comparisons(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.28 0.104 -2.7 0.00684 7.2 -0.483 -0.0771\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmfx <- slopes(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b2 = 0.2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 0.0938 0.109 0.857 0.391 1.4 -0.121 0.308\n\nTerm: b2=0.2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\npre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35)))\nhypotheses(pre, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -3.57e-05 0.000172 -0.207 0.836 0.3 -0.000373 0.000302\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# The `hypothesis` argument can be used to compute standard errors for fitted values\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\n\nf <- function(x) predict(x, type = \"link\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 2 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 3 0.233 0.781 0.299 0.765 0.4 -1.30 1.764\n 4 -0.595 0.647 -0.919 0.358 1.5 -1.86 0.674\n 5 -0.418 0.647 -0.645 0.519 0.9 -1.69 0.851\n 6 -5.026 2.195 -2.290 0.022 5.5 -9.33 -0.725\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nf <- function(x) predict(x, type = \"response\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 2 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 3 0.55803 0.1926 2.898 0.00376 8.1 0.1806 0.9355\n 4 0.35560 0.1483 2.398 0.01648 5.9 0.0650 0.6462\n 5 0.39710 0.1550 2.562 0.01041 6.6 0.0933 0.7009\n 6 0.00652 0.0142 0.459 0.64653 0.6 -0.0213 0.0344\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Complex aggregation\n# Step 1: Collapse predicted probabilities by outcome level, for each individual\n# Step 2: Take the mean of the collapsed probabilities by group and `cyl`\nlibrary(dplyr)\nlibrary(MASS)\nlibrary(dplyr)\n\ndat <- transform(mtcars, gear = factor(gear))\nmod <- polr(gear ~ factor(cyl) + hp, dat)\n\naggregation_fun <- function(x) {\n predictions(x, vcov = FALSE) |>\n mutate(group = ifelse(group %in% c(\"3\", \"4\"), \"3 & 4\", \"5\")) |>\n summarize(estimate = sum(estimate), .by = c(\"rowid\", \"cyl\", \"group\")) |>\n summarize(estimate = mean(estimate), .by = c(\"cyl\", \"group\")) |>\n rename(term = cyl)\n}\n\nhypotheses(mod, hypothesis = aggregation_fun)\n\n\n Group Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3 & 4 6 0.8390 0.0651 12.89 <0.001 123.9 0.7115 0.967\n 3 & 4 4 0.7197 0.1099 6.55 <0.001 34.0 0.5044 0.935\n 3 & 4 8 0.9283 0.0174 53.45 <0.001 Inf 0.8943 0.962\n 5 6 0.1610 0.0651 2.47 0.0134 6.2 0.0334 0.289\n 5 4 0.2803 0.1099 2.55 0.0108 6.5 0.0649 0.496\n 5 8 0.0717 0.0174 4.13 <0.001 14.7 0.0377 0.106\n\nColumns: term, group, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Equivalence, non-inferiority, and non-superiority tests\nmod <- lm(mpg ~ hp + factor(gear), data = mtcars)\np <- predictions(mod, newdata = \"median\")\nhypotheses(p, equivalence = c(17, 18))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n 19.7 1 19.6 <0.001 281.3 17.7 21.6 0.951 0.00404\n p (Equiv) hp gear\n 0.951 123 3\n\nType: response \nColumns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, gear, mpg, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\nmfx <- avg_slopes(mod, variables = \"hp\")\nhypotheses(mfx, equivalence = c(-.1, .1))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -0.0669 0.011 -6.05 <0.001 29.4 -0.0885 -0.0452 <0.001 0.00135\n p (Equiv)\n 0.00135\n\nTerm: hp\nType: response \nComparison: mean(dY/dX)\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\ncmp <- avg_comparisons(mod, variables = \"gear\", hypothesis = \"pairwise\")\nhypotheses(cmp, equivalence = c(0, 10))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -3.94 2.05 -1.92 0.0543 4.2 -7.95 0.0727 <0.001 0.973\n p (Equiv)\n 0.973\n\nTerm: (mean(4) - mean(3)) - (mean(5) - mean(3))\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\n# joint hypotheses: character vector\nmodel <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars)\nhypotheses(model, joint = c(\"as.factor(cyl)6:hp\", \"as.factor(cyl)8:hp\"))\n\n\n\nJoint hypothesis test:\nas.factor(cyl)6:hp = 0\nas.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 2.11 0.142 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: regular expression\nhypotheses(model, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n as.factor(cyl)6:hp = 0\n as.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 5.7 0.00197 4 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: integer indices\nhypotheses(model, joint = 2:3)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n \n F Pr(>|F|) Df 1 Df 2\n 6.12 0.00665 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: different null hypotheses\nhypotheses(model, joint = 2:3, hypothesis = 1)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 1\n \n F Pr(>|F|) Df 1 Df 2\n 6.84 0.00411 2 26\n\nColumns: statistic, p.value, df1, df2 \n\nhypotheses(model, joint = 2:3, hypothesis = 1:2)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 2\n \n F Pr(>|F|) Df 1 Df 2\n 7.47 0.00273 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: marginaleffects object\ncmp <- avg_comparisons(model)\nhypotheses(cmp, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n cyl mean(6) - mean(4) = 0\n cyl mean(8) - mean(4) = 0\n \n F Pr(>|F|) Df 1 Df 2\n 1.6 0.221 2 26\n\nColumns: statistic, p.value, df1, df2", + "text": "Outcome predicted by a fitted model on a specified scale for a given combination of values of the predictor variables, such as their observed values, their means, or factor levels (a.k.a. \"reference grid\").\n\n\npredictions(): unit-level (conditional) estimates.\n\n\navg_predictions(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the predictions vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/predictions.html\n\n\nhttps://marginaleffects.com/\n\n\npredictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = FALSE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\navg_predictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = TRUE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate predictions.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level predictions for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\nstring:\n\n\n\"mean\": Predictions evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Predictions evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Predictions evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Predictions evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Predictions evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nCounterfactual variables.\n\n\nOutput:\n\n\npredictions(): The entire dataset is replicated once for each unique combination of variables, and predictions are made.\n\n\navg_predictions(): The entire dataset is replicated, predictions are made, and they are marginalized by variables categories.\n\n\nWarning: This can be expensive in large datasets.\n\n\nWarning: Users who need \"conditional\" predictions should use the newdata argument instead of variables.\n\n\n\n\nInput:\n\n\nNULL: computes one prediction per row of newdata\n\n\nCharacter vector: the dataset is replicated once of every combination of unique values of the variables identified in variables.\n\n\nNamed list: names identify the subset of variables of interest and their values. For numeric variables, the variables argument supports functions and string shortcuts:\n\n\nA function which returns a numeric value\n\n\nNumeric vector: Contrast between the 2nd element and the 1st element of the x vector.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\"threenum\": mean and 1 standard deviation on both sides\n\n\n\"fivenum\": Tukey’s five numbers\n\n\n\n\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nbyfun\n\n\nA function such as mean() or sum() used to aggregate estimates within the subgroups defined by the by argument. NULL uses the mean() function. Must accept a numeric vector and return a single numeric value. This is sometimes used to take the sum or mean of predicted probabilities across outcome or predictor levels. See examples section.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nestimate: predicted outcome\n\n\nstd.error: standard errors computed using the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_predictions(): Average predictions\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Adjusted Prediction for every row of the original dataset\nmod <- lm(mpg ~ hp + factor(cyl), data = mtcars)\npred <- predictions(mod)\nhead(pred)\n\n# Adjusted Predictions at User-Specified Values of the Regressors\npredictions(mod, newdata = datagrid(hp = c(100, 120), cyl = 4))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\npredictions(m, newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Average Adjusted Predictions (AAP)\nlibrary(dplyr)\nmod <- lm(mpg ~ hp * am * vs, mtcars)\n\navg_predictions(mod)\n\npredictions(mod, by = \"am\")\n\n# Conditional Adjusted Predictions\nplot_predictions(mod, condition = \"hp\")\n\n# Counterfactual predictions with the `variables` argument\n# the `mtcars` dataset has 32 rows\n\nmod <- lm(mpg ~ hp + am, data = mtcars)\np <- predictions(mod)\nhead(p)\nnrow(p)\n\n# average counterfactual predictions\navg_predictions(mod, variables = \"am\")\n\n# counterfactual predictions obtained by replicating the entire for different\n# values of the predictors\np <- predictions(mod, variables = list(hp = c(90, 110)))\nnrow(p)\n\n\n# hypothesis test: is the prediction in the 1st row equal to the prediction in the 2nd row\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 = b2\")\n\n# same hypothesis test using row indices\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = lc)\n\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\npredictions(mod, by = c(\"am\", \"vs\"))\n\nlibrary(nnet)\nnom <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\n\n# first 5 raw predictions\npredictions(nom, type = \"probs\") |> head()\n\n# average predictions\navg_predictions(nom, type = \"probs\", by = \"group\")\n\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\n\npredictions(nom, type = \"probs\", by = by)\n\n# sum of predicted probabilities for combined response levels\nmod <- multinom(factor(cyl) ~ mpg + am, data = mtcars, trace = FALSE)\nby <- data.frame(\n by = c(\"4,6\", \"4,6\", \"8\"),\n group = as.character(c(4, 6, 8)))\npredictions(mod, newdata = \"mean\", byfun = sum, by = by)", "crumbs": [ "Model to Meaning", "Functions", - "`hypotheses`" + "`predictions`" ] }, { - "objectID": "man/hypotheses.html#non-linear-tests-for-null-hypotheses-joint-hypotheses-equivalence-non-superiority-and-non-inferiority", - "href": "man/hypotheses.html#non-linear-tests-for-null-hypotheses-joint-hypotheses-equivalence-non-superiority-and-non-inferiority", + "objectID": "man/predictions.html#predictions", + "href": "man/predictions.html#predictions", "title": "", "section": "", - "text": "Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, hypotheses emulates the behavior of the excellent and well-established car::deltaMethod and car::linearHypothesis functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors.\nTo learn more, read the hypothesis tests vignette, visit the package website, or scroll down this page for a full list of vignettes:\n\n\nhttps://marginaleffects.com/vignettes/hypothesis.html\n\n\nhttps://marginaleffects.com/\n\n\nWarning #1: Tests are conducted directly on the scale defined by the type argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the “link” scale instead of the “response” scale which is often the default.\nWarning #2: For hypothesis tests on objects produced by the marginaleffects package, it is safer to use the hypothesis argument of the original function. Using hypotheses() may not work in certain environments, in lists, or when working programmatically with *apply style functions.\nWarning #3: The tests assume that the hypothesis expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the inferences() function with method = “boot”.\n\nhypotheses(\n model,\n hypothesis = NULL,\n vcov = NULL,\n conf_level = 0.95,\n df = NULL,\n equivalence = NULL,\n joint = FALSE,\n joint_test = \"f\",\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object or object generated by the comparisons(), slopes(), or predictions() functions.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When using joint_test=“f”, the df argument should be a numeric vector of length 2.\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\njoint\n\n\nJoint test of statistical significance. The null hypothesis value can be set using the hypothesis argument.\n\n\nFALSE: Hypotheses are not tested jointly.\n\n\nTRUE: All parameters are tested jointly.\n\n\nString: A regular expression to match parameters to be tested jointly. grep(joint, perl = TRUE)\n\n\nCharacter vector of parameter names to be tested. Characters refer to the names of the vector returned by coef(object).\n\n\nInteger vector of indices. Which parameters positions to test jointly.\n\n\n\n\n\n\njoint_test\n\n\nA character string specifying the type of test, either \"f\" or \"chisq\". The null hypothesis is set by the hypothesis argument, with default null equal to 0 for all parameters.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nThe test statistic for the joint Wald test is calculated as (R * theta_hat - r)’ * inv(R * V_hat * R’) * (R * theta_hat - r) / Q, where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters, r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized.\nThe p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test). For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters. For the Chi-squared test, the degrees of freedom are Q.\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars)\n\nhypotheses(mod)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n (Intercept) 35.8460 2.041 17.56 <0.001 227.0 31.8457 39.846319\n hp -0.0231 0.012 -1.93 0.0531 4.2 -0.0465 0.000306\n wt -3.1814 0.720 -4.42 <0.001 16.6 -4.5918 -1.771012\n factor(cyl)6 -3.3590 1.402 -2.40 0.0166 5.9 -6.1062 -0.611803\n factor(cyl)8 -3.1859 2.170 -1.47 0.1422 2.8 -7.4399 1.068169\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Test of equality between coefficients\nhypotheses(mod, hypothesis = \"hp = wt\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Non-linear function\nhypotheses(mod, hypothesis = \"exp(hp + wt) = 0.1\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.0594 0.0292 -2.04 0.0418 4.6 -0.117 -0.0022\n\nTerm: exp(hp + wt) = 0.1\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Robust standard errors\nhypotheses(mod, hypothesis = \"hp = wt\", vcov = \"HC3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.805 3.92 <0.001 13.5 1.58 4.74\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# b1, b2, ... shortcuts can be used to identify the position of the\n# parameters of interest in the output of\nhypotheses(mod, hypothesis = \"b2 = b3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: b2 = b3\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# wildcard\nhypotheses(mod, hypothesis = \"b* / b2 = 1\")\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n b1 / b2 = 1 -1551 764.0 -2.03 0.0423 4.6 -3048.9 -54\n b2 / b2 = 1 0 NA NA NA NA NA NA\n b3 / b2 = 1 137 78.1 1.75 0.0804 3.6 -16.6 290\n b4 / b2 = 1 144 111.0 1.30 0.1938 2.4 -73.3 362\n b5 / b2 = 1 137 151.9 0.90 0.3679 1.4 -161.0 435\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# term names with special characters have to be enclosed in backticks\nhypotheses(mod, hypothesis = \"`factor(cyl)6` = `factor(cyl)8`\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.173 1.65 -0.105 0.917 0.1 -3.41 3.07\n\nTerm: `factor(cyl)6` = `factor(cyl)8`\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmod2 <- lm(mpg ~ hp * drat, data = mtcars)\nhypotheses(mod2, hypothesis = \"`hp:drat` = drat\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -6.08 2.89 -2.1 0.0357 4.8 -11.8 -0.405\n\nTerm: `hp:drat` = drat\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# predictions(), comparisons(), and slopes()\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\ncmp <- comparisons(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.28 0.104 -2.7 0.00684 7.2 -0.483 -0.0771\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmfx <- slopes(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b2 = 0.2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 0.0938 0.109 0.857 0.391 1.4 -0.121 0.308\n\nTerm: b2=0.2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\npre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35)))\nhypotheses(pre, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -3.57e-05 0.000172 -0.207 0.836 0.3 -0.000373 0.000302\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# The `hypothesis` argument can be used to compute standard errors for fitted values\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\n\nf <- function(x) predict(x, type = \"link\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 2 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 3 0.233 0.781 0.299 0.765 0.4 -1.30 1.764\n 4 -0.595 0.647 -0.919 0.358 1.5 -1.86 0.674\n 5 -0.418 0.647 -0.645 0.519 0.9 -1.69 0.851\n 6 -5.026 2.195 -2.290 0.022 5.5 -9.33 -0.725\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nf <- function(x) predict(x, type = \"response\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 2 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 3 0.55803 0.1926 2.898 0.00376 8.1 0.1806 0.9355\n 4 0.35560 0.1483 2.398 0.01648 5.9 0.0650 0.6462\n 5 0.39710 0.1550 2.562 0.01041 6.6 0.0933 0.7009\n 6 0.00652 0.0142 0.459 0.64653 0.6 -0.0213 0.0344\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Complex aggregation\n# Step 1: Collapse predicted probabilities by outcome level, for each individual\n# Step 2: Take the mean of the collapsed probabilities by group and `cyl`\nlibrary(dplyr)\nlibrary(MASS)\nlibrary(dplyr)\n\ndat <- transform(mtcars, gear = factor(gear))\nmod <- polr(gear ~ factor(cyl) + hp, dat)\n\naggregation_fun <- function(x) {\n predictions(x, vcov = FALSE) |>\n mutate(group = ifelse(group %in% c(\"3\", \"4\"), \"3 & 4\", \"5\")) |>\n summarize(estimate = sum(estimate), .by = c(\"rowid\", \"cyl\", \"group\")) |>\n summarize(estimate = mean(estimate), .by = c(\"cyl\", \"group\")) |>\n rename(term = cyl)\n}\n\nhypotheses(mod, hypothesis = aggregation_fun)\n\n\n Group Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3 & 4 6 0.8390 0.0651 12.89 <0.001 123.9 0.7115 0.967\n 3 & 4 4 0.7197 0.1099 6.55 <0.001 34.0 0.5044 0.935\n 3 & 4 8 0.9283 0.0174 53.45 <0.001 Inf 0.8943 0.962\n 5 6 0.1610 0.0651 2.47 0.0134 6.2 0.0334 0.289\n 5 4 0.2803 0.1099 2.55 0.0108 6.5 0.0649 0.496\n 5 8 0.0717 0.0174 4.13 <0.001 14.7 0.0377 0.106\n\nColumns: term, group, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Equivalence, non-inferiority, and non-superiority tests\nmod <- lm(mpg ~ hp + factor(gear), data = mtcars)\np <- predictions(mod, newdata = \"median\")\nhypotheses(p, equivalence = c(17, 18))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n 19.7 1 19.6 <0.001 281.3 17.7 21.6 0.951 0.00404\n p (Equiv) hp gear\n 0.951 123 3\n\nType: response \nColumns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, gear, mpg, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\nmfx <- avg_slopes(mod, variables = \"hp\")\nhypotheses(mfx, equivalence = c(-.1, .1))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -0.0669 0.011 -6.05 <0.001 29.4 -0.0885 -0.0452 <0.001 0.00135\n p (Equiv)\n 0.00135\n\nTerm: hp\nType: response \nComparison: mean(dY/dX)\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\ncmp <- avg_comparisons(mod, variables = \"gear\", hypothesis = \"pairwise\")\nhypotheses(cmp, equivalence = c(0, 10))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -3.94 2.05 -1.92 0.0543 4.2 -7.95 0.0727 <0.001 0.973\n p (Equiv)\n 0.973\n\nTerm: (mean(4) - mean(3)) - (mean(5) - mean(3))\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\n# joint hypotheses: character vector\nmodel <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars)\nhypotheses(model, joint = c(\"as.factor(cyl)6:hp\", \"as.factor(cyl)8:hp\"))\n\n\n\nJoint hypothesis test:\nas.factor(cyl)6:hp = 0\nas.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 2.11 0.142 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: regular expression\nhypotheses(model, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n as.factor(cyl)6:hp = 0\n as.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 5.7 0.00197 4 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: integer indices\nhypotheses(model, joint = 2:3)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n \n F Pr(>|F|) Df 1 Df 2\n 6.12 0.00665 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: different null hypotheses\nhypotheses(model, joint = 2:3, hypothesis = 1)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 1\n \n F Pr(>|F|) Df 1 Df 2\n 6.84 0.00411 2 26\n\nColumns: statistic, p.value, df1, df2 \n\nhypotheses(model, joint = 2:3, hypothesis = 1:2)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 2\n \n F Pr(>|F|) Df 1 Df 2\n 7.47 0.00273 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: marginaleffects object\ncmp <- avg_comparisons(model)\nhypotheses(cmp, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n cyl mean(6) - mean(4) = 0\n cyl mean(8) - mean(4) = 0\n \n F Pr(>|F|) Df 1 Df 2\n 1.6 0.221 2 26\n\nColumns: statistic, p.value, df1, df2", + "text": "Outcome predicted by a fitted model on a specified scale for a given combination of values of the predictor variables, such as their observed values, their means, or factor levels (a.k.a. \"reference grid\").\n\n\npredictions(): unit-level (conditional) estimates.\n\n\navg_predictions(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the predictions vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/predictions.html\n\n\nhttps://marginaleffects.com/\n\n\npredictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = FALSE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\navg_predictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = TRUE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate predictions.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level predictions for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\nstring:\n\n\n\"mean\": Predictions evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Predictions evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Predictions evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Predictions evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Predictions evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nCounterfactual variables.\n\n\nOutput:\n\n\npredictions(): The entire dataset is replicated once for each unique combination of variables, and predictions are made.\n\n\navg_predictions(): The entire dataset is replicated, predictions are made, and they are marginalized by variables categories.\n\n\nWarning: This can be expensive in large datasets.\n\n\nWarning: Users who need \"conditional\" predictions should use the newdata argument instead of variables.\n\n\n\n\nInput:\n\n\nNULL: computes one prediction per row of newdata\n\n\nCharacter vector: the dataset is replicated once of every combination of unique values of the variables identified in variables.\n\n\nNamed list: names identify the subset of variables of interest and their values. For numeric variables, the variables argument supports functions and string shortcuts:\n\n\nA function which returns a numeric value\n\n\nNumeric vector: Contrast between the 2nd element and the 1st element of the x vector.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\"threenum\": mean and 1 standard deviation on both sides\n\n\n\"fivenum\": Tukey’s five numbers\n\n\n\n\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nbyfun\n\n\nA function such as mean() or sum() used to aggregate estimates within the subgroups defined by the by argument. NULL uses the mean() function. Must accept a numeric vector and return a single numeric value. This is sometimes used to take the sum or mean of predicted probabilities across outcome or predictor levels. See examples section.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nestimate: predicted outcome\n\n\nstd.error: standard errors computed using the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_predictions(): Average predictions\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Adjusted Prediction for every row of the original dataset\nmod <- lm(mpg ~ hp + factor(cyl), data = mtcars)\npred <- predictions(mod)\nhead(pred)\n\n# Adjusted Predictions at User-Specified Values of the Regressors\npredictions(mod, newdata = datagrid(hp = c(100, 120), cyl = 4))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\npredictions(m, newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Average Adjusted Predictions (AAP)\nlibrary(dplyr)\nmod <- lm(mpg ~ hp * am * vs, mtcars)\n\navg_predictions(mod)\n\npredictions(mod, by = \"am\")\n\n# Conditional Adjusted Predictions\nplot_predictions(mod, condition = \"hp\")\n\n# Counterfactual predictions with the `variables` argument\n# the `mtcars` dataset has 32 rows\n\nmod <- lm(mpg ~ hp + am, data = mtcars)\np <- predictions(mod)\nhead(p)\nnrow(p)\n\n# average counterfactual predictions\navg_predictions(mod, variables = \"am\")\n\n# counterfactual predictions obtained by replicating the entire for different\n# values of the predictors\np <- predictions(mod, variables = list(hp = c(90, 110)))\nnrow(p)\n\n\n# hypothesis test: is the prediction in the 1st row equal to the prediction in the 2nd row\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 = b2\")\n\n# same hypothesis test using row indices\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = lc)\n\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\npredictions(mod, by = c(\"am\", \"vs\"))\n\nlibrary(nnet)\nnom <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\n\n# first 5 raw predictions\npredictions(nom, type = \"probs\") |> head()\n\n# average predictions\navg_predictions(nom, type = \"probs\", by = \"group\")\n\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\n\npredictions(nom, type = \"probs\", by = by)\n\n# sum of predicted probabilities for combined response levels\nmod <- multinom(factor(cyl) ~ mpg + am, data = mtcars, trace = FALSE)\nby <- data.frame(\n by = c(\"4,6\", \"4,6\", \"8\"),\n group = as.character(c(4, 6, 8)))\npredictions(mod, newdata = \"mean\", byfun = sum, by = by)", "crumbs": [ "Model to Meaning", "Functions", - "`hypotheses`" + "`predictions`" ] }, { - "objectID": "man/inferences.html", - "href": "man/inferences.html", + "objectID": "man/datagrid.html", + "href": "man/datagrid.html", "title": "", "section": "", - "text": "Warning: This function is experimental. It may be renamed, the user interface may change, or the functionality may migrate to arguments in other marginaleffects functions.\nApply this function to a marginaleffects object to change the inferential method used to compute uncertainty estimates.\n\ninferences(\n x,\n method,\n R = 1000,\n conf_type = \"perc\",\n conformal_test = NULL,\n conformal_calibration = NULL,\n conformal_score = \"residual_abs\",\n ...\n)\n\n\n\n\n\nx\n\n\nObject produced by one of the core marginaleffects functions.\n\n\n\n\nmethod\n\n\nString\n\n\n\"delta\": delta method standard errors\n\n\n\"boot\" package\n\n\n\"fwb\": fractional weighted bootstrap\n\n\n\"rsample\" package\n\n\n\"simulation\" from a multivariate normal distribution (Krinsky & Robb, 1986)\n\n\n\"mi\" multiple imputation for missing data\n\n\n\"conformal_split\": prediction intervals using split conformal prediction (see Angelopoulos & Bates, 2022)\n\n\n\"conformal_cv+\": prediction intervals using cross-validation+ conformal prediction (see Barber et al., 2020)\n\n\n\n\n\n\nR\n\n\nNumber of resamples, simulations, or cross-validation folds.\n\n\n\n\nconf_type\n\n\nString: type of bootstrap interval to construct.\n\n\nboot: \"perc\", \"norm\", \"basic\", or \"bca\"\n\n\nfwb: \"perc\", \"norm\", \"basic\", \"bc\", or \"bca\"\n\n\nrsample: \"perc\" or \"bca\"\n\n\nsimulation: argument ignored.\n\n\n\n\n\n\nconformal_test\n\n\nData frame of test data for conformal prediction.\n\n\n\n\nconformal_calibration\n\n\nData frame of calibration data for split conformal prediction (method=“conformal_split).\n\n\n\n\nconformal_score\n\n\nString. Warning: The type argument in predictions() must generate predictions which are on the same scale as the outcome variable. Typically, this means that type must be \"response\" or \"probs\".\n\n\n\"residual_abs\" or \"residual_sq\" for regression tasks (numeric outcome)\n\n\n\"softmax\" for classification tasks (when predictions() returns a group columns, such as multinomial or ordinal logit models.\n\n\n\n\n\n\n…\n\n\n\n\nIf method=“boot”, additional arguments are passed to boot::boot().\n\n\nIf method=“fwb”, additional arguments are passed to fwb::fwb().\n\n\nIf method=“rsample”, additional arguments are passed to rsample::bootstraps().\n\n\nAdditional arguments are ignored for all other methods.\n\n\n\n\n\nWhen method=“simulation”, we conduct simulation-based inference following the method discussed in Krinsky & Robb (1986):\n\n\nDraw R sets of simulated coefficients from a multivariate normal distribution with mean equal to the original model’s estimated coefficients and variance equal to the model’s variance-covariance matrix (classical, \"HC3\", or other).\n\n\nUse the R sets of coefficients to compute R sets of estimands: predictions, comparisons, slopes, or hypotheses.\n\n\nTake quantiles of the resulting distribution of estimands to obtain a confidence interval and the standard deviation of simulated estimates to estimate the standard error.\n\n\nWhen method=“fwb”, drawn weights are supplied to the model fitting function’s weights argument; if the model doesn’t accept non-integer weights, this method should not be used. If weights were included in the original model fit, they are extracted by weights() and multiplied by the drawn weights. These weights are supplied to the wts argument of the estimation function (e.g., comparisons()).\n\nA marginaleffects object with simulation or bootstrap resamples and objects attached.\n\nKrinsky, I., and A. L. Robb. 1986. “On Approximating the Statistical Properties of Elasticities.” Review of Economics and Statistics 68 (4): 715–9.\nKing, Gary, Michael Tomz, and Jason Wittenberg. \"Making the most of statistical analyses: Improving interpretation and presentation.\" American journal of political science (2000): 347-361\nDowd, Bryan E., William H. Greene, and Edward C. Norton. \"Computation of standard errors.\" Health services research 49.2 (2014): 731-750.\nAngelopoulos, Anastasios N., and Stephen Bates. 2022. \"A Gentle Introduction to Conformal Prediction and Distribution-Free Uncertainty Quantification.\" arXiv. https://doi.org/10.48550/arXiv.2107.07511.\nBarber, Rina Foygel, Emmanuel J. Candes, Aaditya Ramdas, and Ryan J. Tibshirani. 2020. “Predictive Inference with the Jackknife+.” arXiv. http://arxiv.org/abs/1905.02928.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nlibrary(magrittr)\nset.seed(1024)\nmod <- lm(Sepal.Length ~ Sepal.Width * Species, data = iris)\n\n# bootstrap\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"boot\")\n\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"rsample\")\n\n# Fractional (bayesian) bootstrap\navg_slopes(mod, by = \"Species\") %>%\n inferences(method = \"fwb\") %>%\n posterior_draws(\"rvar\") %>%\n data.frame()\n\n# Simulation-based inference\nslopes(mod) %>%\n inferences(method = \"simulation\") %>%\n head()", + "text": "Generate a data grid of user-specified values for use in the newdata argument of the predictions(), comparisons(), and slopes() functions. This is useful to define where in the predictor space we want to evaluate the quantities of interest. Ex: the predicted outcome or slope for a 37 year old college graduate.\n\ndatagrid(\n ...,\n model = NULL,\n newdata = NULL,\n by = NULL,\n grid_type = \"mean_or_mode\",\n response = FALSE,\n FUN_character = NULL,\n FUN_factor = NULL,\n FUN_logical = NULL,\n FUN_numeric = NULL,\n FUN_integer = NULL,\n FUN_binary = NULL,\n FUN_other = NULL\n)\n\n\n\n\n\n…\n\n\nnamed arguments with vectors of values or functions for user-specified variables.\n\n\nFunctions are applied to the variable in the model dataset or newdata, and must return a vector of the appropriate type.\n\n\nCharacter vectors are automatically transformed to factors if necessary. +The output will include all combinations of these variables (see Examples below.)\n\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\ndata.frame (one and only one of the model and newdata arguments can be used.)\n\n\n\n\nby\n\n\ncharacter vector with grouping variables within which FUN_* functions are applied to create \"sub-grids\" with unspecified variables.\n\n\n\n\ngrid_type\n\n\ncharacter. Determines the functions to apply to each variable. The defaults can be overridden by defining individual variables explicitly in …, or by supplying a function to one of the FUN_* arguments.\n\n\n\"mean_or_mode\": Character, factor, logical, and binary variables are set to their modes. Numeric, integer, and other variables are set to their means.\n\n\n\"balanced\": Each unique level of character, factor, logical, and binary variables are preserved. Numeric, integer, and other variables are set to their means. Warning: When there are many variables and many levels per variable, a balanced grid can be very large. In those cases, it is better to use grid_type=“mean_or_mode” and to specify the unique levels of a subset of named variables explicitly.\n\n\n\"counterfactual\": the entire dataset is duplicated for each combination of the variable values specified in …. Variables not explicitly supplied to datagrid() are set to their observed values in the original dataset.\n\n\n\n\n\n\nresponse\n\n\nLogical should the response variable be included in the grid, even if it is not specified explicitly.\n\n\n\n\nFUN_character\n\n\nthe function to be applied to character variables.\n\n\n\n\nFUN_factor\n\n\nthe function to be applied to factor variables. This only applies if the variable in the original data is a factor. For variables converted to factor in a model-fitting formula, for example, FUN_character is used.\n\n\n\n\nFUN_logical\n\n\nthe function to be applied to logical variables.\n\n\n\n\nFUN_numeric\n\n\nthe function to be applied to numeric variables.\n\n\n\n\nFUN_integer\n\n\nthe function to be applied to integer variables.\n\n\n\n\nFUN_binary\n\n\nthe function to be applied to binary variables.\n\n\n\n\nFUN_other\n\n\nthe function to be applied to other variable types.\n\n\n\nIf datagrid is used in a predictions(), comparisons(), or slopes() call as the newdata argument, the model is automatically inserted in the model argument of datagrid() call, and users do not need to specify either the model or newdata arguments. The same behavior will occur when the value supplied to newdata= is a function call which starts with \"datagrid\". This is intended to allow users to create convenience shortcuts like:\nlibrary(marginaleffects)\nmod <- lm(mpg ~ am + vs + factor(cyl) + hp, mtcars)\ndatagrid_bal <- function(...) datagrid(..., grid_type = \"balanced\")\npredictions(model, newdata = datagrid_bal(cyl = 4))\n\nIf users supply a model, the data used to fit that model is retrieved using the insight::get_data function.\n\nA data.frame in which each row corresponds to one combination of the named predictors supplied by the user via the … dots. Variables which are not explicitly defined are held at their mean or mode.\n\n\nlibrary(\"marginaleffects\")\n\n# The output only has 2 rows, and all the variables except `hp` are at their\n# mean or mode.\ndatagrid(newdata = mtcars, hp = c(100, 110))\n\n mpg cyl disp drat wt qsec vs am gear carb hp\n1 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 100\n2 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 110\n rowid\n1 1\n2 2\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndatagrid(model = mod, hp = c(100, 110))\n\n hp rowid\n1 100 1\n2 110 2\n\n# Use in `marginaleffects` to compute \"Typical Marginal Effects\". When used\n# in `slopes()` or `predictions()` we do not need to specify the\n#`model` or `newdata` arguments.\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 100 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 110 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nColumns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# datagrid accepts functions\ndatagrid(hp = range, cyl = unique, newdata = mtcars)\n\n mpg disp drat wt qsec vs am gear carb hp cyl rowid\n1 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 6 1\n2 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 4 2\n3 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 8 3\n4 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 6 4\n5 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 4 5\n6 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 8 6\n\ncomparisons(mod, newdata = datagrid(hp = fivenum))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 52 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 96 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 123 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 180 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 335 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nComparison: +1\nColumns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# The full dataset is duplicated with each observation given counterfactual\n# values of 100 and 110 for the `hp` variable. The original `mtcars` includes\n# 32 rows, so the resulting dataset includes 64 rows.\ndg <- datagrid(newdata = mtcars, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndg <- datagrid(model = mod, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64", "crumbs": [ "Model to Meaning", "Functions", - "`inferences`" + "`datagrid`" ] }, { - "objectID": "man/inferences.html#experimental-bootstrap-conformal-and-simulation-based-inference", - "href": "man/inferences.html#experimental-bootstrap-conformal-and-simulation-based-inference", + "objectID": "man/datagrid.html#data-grids", + "href": "man/datagrid.html#data-grids", "title": "", "section": "", - "text": "Warning: This function is experimental. It may be renamed, the user interface may change, or the functionality may migrate to arguments in other marginaleffects functions.\nApply this function to a marginaleffects object to change the inferential method used to compute uncertainty estimates.\n\ninferences(\n x,\n method,\n R = 1000,\n conf_type = \"perc\",\n conformal_test = NULL,\n conformal_calibration = NULL,\n conformal_score = \"residual_abs\",\n ...\n)\n\n\n\n\n\nx\n\n\nObject produced by one of the core marginaleffects functions.\n\n\n\n\nmethod\n\n\nString\n\n\n\"delta\": delta method standard errors\n\n\n\"boot\" package\n\n\n\"fwb\": fractional weighted bootstrap\n\n\n\"rsample\" package\n\n\n\"simulation\" from a multivariate normal distribution (Krinsky & Robb, 1986)\n\n\n\"mi\" multiple imputation for missing data\n\n\n\"conformal_split\": prediction intervals using split conformal prediction (see Angelopoulos & Bates, 2022)\n\n\n\"conformal_cv+\": prediction intervals using cross-validation+ conformal prediction (see Barber et al., 2020)\n\n\n\n\n\n\nR\n\n\nNumber of resamples, simulations, or cross-validation folds.\n\n\n\n\nconf_type\n\n\nString: type of bootstrap interval to construct.\n\n\nboot: \"perc\", \"norm\", \"basic\", or \"bca\"\n\n\nfwb: \"perc\", \"norm\", \"basic\", \"bc\", or \"bca\"\n\n\nrsample: \"perc\" or \"bca\"\n\n\nsimulation: argument ignored.\n\n\n\n\n\n\nconformal_test\n\n\nData frame of test data for conformal prediction.\n\n\n\n\nconformal_calibration\n\n\nData frame of calibration data for split conformal prediction (method=“conformal_split).\n\n\n\n\nconformal_score\n\n\nString. Warning: The type argument in predictions() must generate predictions which are on the same scale as the outcome variable. Typically, this means that type must be \"response\" or \"probs\".\n\n\n\"residual_abs\" or \"residual_sq\" for regression tasks (numeric outcome)\n\n\n\"softmax\" for classification tasks (when predictions() returns a group columns, such as multinomial or ordinal logit models.\n\n\n\n\n\n\n…\n\n\n\n\nIf method=“boot”, additional arguments are passed to boot::boot().\n\n\nIf method=“fwb”, additional arguments are passed to fwb::fwb().\n\n\nIf method=“rsample”, additional arguments are passed to rsample::bootstraps().\n\n\nAdditional arguments are ignored for all other methods.\n\n\n\n\n\nWhen method=“simulation”, we conduct simulation-based inference following the method discussed in Krinsky & Robb (1986):\n\n\nDraw R sets of simulated coefficients from a multivariate normal distribution with mean equal to the original model’s estimated coefficients and variance equal to the model’s variance-covariance matrix (classical, \"HC3\", or other).\n\n\nUse the R sets of coefficients to compute R sets of estimands: predictions, comparisons, slopes, or hypotheses.\n\n\nTake quantiles of the resulting distribution of estimands to obtain a confidence interval and the standard deviation of simulated estimates to estimate the standard error.\n\n\nWhen method=“fwb”, drawn weights are supplied to the model fitting function’s weights argument; if the model doesn’t accept non-integer weights, this method should not be used. If weights were included in the original model fit, they are extracted by weights() and multiplied by the drawn weights. These weights are supplied to the wts argument of the estimation function (e.g., comparisons()).\n\nA marginaleffects object with simulation or bootstrap resamples and objects attached.\n\nKrinsky, I., and A. L. Robb. 1986. “On Approximating the Statistical Properties of Elasticities.” Review of Economics and Statistics 68 (4): 715–9.\nKing, Gary, Michael Tomz, and Jason Wittenberg. \"Making the most of statistical analyses: Improving interpretation and presentation.\" American journal of political science (2000): 347-361\nDowd, Bryan E., William H. Greene, and Edward C. Norton. \"Computation of standard errors.\" Health services research 49.2 (2014): 731-750.\nAngelopoulos, Anastasios N., and Stephen Bates. 2022. \"A Gentle Introduction to Conformal Prediction and Distribution-Free Uncertainty Quantification.\" arXiv. https://doi.org/10.48550/arXiv.2107.07511.\nBarber, Rina Foygel, Emmanuel J. Candes, Aaditya Ramdas, and Ryan J. Tibshirani. 2020. “Predictive Inference with the Jackknife+.” arXiv. http://arxiv.org/abs/1905.02928.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nlibrary(magrittr)\nset.seed(1024)\nmod <- lm(Sepal.Length ~ Sepal.Width * Species, data = iris)\n\n# bootstrap\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"boot\")\n\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"rsample\")\n\n# Fractional (bayesian) bootstrap\navg_slopes(mod, by = \"Species\") %>%\n inferences(method = \"fwb\") %>%\n posterior_draws(\"rvar\") %>%\n data.frame()\n\n# Simulation-based inference\nslopes(mod) %>%\n inferences(method = \"simulation\") %>%\n head()", + "text": "Generate a data grid of user-specified values for use in the newdata argument of the predictions(), comparisons(), and slopes() functions. This is useful to define where in the predictor space we want to evaluate the quantities of interest. Ex: the predicted outcome or slope for a 37 year old college graduate.\n\ndatagrid(\n ...,\n model = NULL,\n newdata = NULL,\n by = NULL,\n grid_type = \"mean_or_mode\",\n response = FALSE,\n FUN_character = NULL,\n FUN_factor = NULL,\n FUN_logical = NULL,\n FUN_numeric = NULL,\n FUN_integer = NULL,\n FUN_binary = NULL,\n FUN_other = NULL\n)\n\n\n\n\n\n…\n\n\nnamed arguments with vectors of values or functions for user-specified variables.\n\n\nFunctions are applied to the variable in the model dataset or newdata, and must return a vector of the appropriate type.\n\n\nCharacter vectors are automatically transformed to factors if necessary. +The output will include all combinations of these variables (see Examples below.)\n\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\ndata.frame (one and only one of the model and newdata arguments can be used.)\n\n\n\n\nby\n\n\ncharacter vector with grouping variables within which FUN_* functions are applied to create \"sub-grids\" with unspecified variables.\n\n\n\n\ngrid_type\n\n\ncharacter. Determines the functions to apply to each variable. The defaults can be overridden by defining individual variables explicitly in …, or by supplying a function to one of the FUN_* arguments.\n\n\n\"mean_or_mode\": Character, factor, logical, and binary variables are set to their modes. Numeric, integer, and other variables are set to their means.\n\n\n\"balanced\": Each unique level of character, factor, logical, and binary variables are preserved. Numeric, integer, and other variables are set to their means. Warning: When there are many variables and many levels per variable, a balanced grid can be very large. In those cases, it is better to use grid_type=“mean_or_mode” and to specify the unique levels of a subset of named variables explicitly.\n\n\n\"counterfactual\": the entire dataset is duplicated for each combination of the variable values specified in …. Variables not explicitly supplied to datagrid() are set to their observed values in the original dataset.\n\n\n\n\n\n\nresponse\n\n\nLogical should the response variable be included in the grid, even if it is not specified explicitly.\n\n\n\n\nFUN_character\n\n\nthe function to be applied to character variables.\n\n\n\n\nFUN_factor\n\n\nthe function to be applied to factor variables. This only applies if the variable in the original data is a factor. For variables converted to factor in a model-fitting formula, for example, FUN_character is used.\n\n\n\n\nFUN_logical\n\n\nthe function to be applied to logical variables.\n\n\n\n\nFUN_numeric\n\n\nthe function to be applied to numeric variables.\n\n\n\n\nFUN_integer\n\n\nthe function to be applied to integer variables.\n\n\n\n\nFUN_binary\n\n\nthe function to be applied to binary variables.\n\n\n\n\nFUN_other\n\n\nthe function to be applied to other variable types.\n\n\n\nIf datagrid is used in a predictions(), comparisons(), or slopes() call as the newdata argument, the model is automatically inserted in the model argument of datagrid() call, and users do not need to specify either the model or newdata arguments. The same behavior will occur when the value supplied to newdata= is a function call which starts with \"datagrid\". This is intended to allow users to create convenience shortcuts like:\nlibrary(marginaleffects)\nmod <- lm(mpg ~ am + vs + factor(cyl) + hp, mtcars)\ndatagrid_bal <- function(...) datagrid(..., grid_type = \"balanced\")\npredictions(model, newdata = datagrid_bal(cyl = 4))\n\nIf users supply a model, the data used to fit that model is retrieved using the insight::get_data function.\n\nA data.frame in which each row corresponds to one combination of the named predictors supplied by the user via the … dots. Variables which are not explicitly defined are held at their mean or mode.\n\n\nlibrary(\"marginaleffects\")\n\n# The output only has 2 rows, and all the variables except `hp` are at their\n# mean or mode.\ndatagrid(newdata = mtcars, hp = c(100, 110))\n\n mpg cyl disp drat wt qsec vs am gear carb hp\n1 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 100\n2 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 110\n rowid\n1 1\n2 2\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndatagrid(model = mod, hp = c(100, 110))\n\n hp rowid\n1 100 1\n2 110 2\n\n# Use in `marginaleffects` to compute \"Typical Marginal Effects\". When used\n# in `slopes()` or `predictions()` we do not need to specify the\n#`model` or `newdata` arguments.\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 100 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 110 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nColumns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# datagrid accepts functions\ndatagrid(hp = range, cyl = unique, newdata = mtcars)\n\n mpg disp drat wt qsec vs am gear carb hp cyl rowid\n1 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 6 1\n2 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 4 2\n3 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 8 3\n4 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 6 4\n5 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 4 5\n6 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 8 6\n\ncomparisons(mod, newdata = datagrid(hp = fivenum))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 52 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 96 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 123 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 180 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 335 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nComparison: +1\nColumns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# The full dataset is duplicated with each observation given counterfactual\n# values of 100 and 110 for the `hp` variable. The original `mtcars` includes\n# 32 rows, so the resulting dataset includes 64 rows.\ndg <- datagrid(newdata = mtcars, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndg <- datagrid(model = mod, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64", "crumbs": [ "Model to Meaning", "Functions", - "`inferences`" + "`datagrid`" ] }, { - "objectID": "man/slopes.html", - "href": "man/slopes.html", + "objectID": "man/posterior_draws.html", + "href": "man/posterior_draws.html", "title": "", "section": "", - "text": "Partial derivative of the regression equation with respect to a regressor of interest.\n\n\nslopes(): unit-level (conditional) estimates.\n\n\navg_slopes(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the slopes vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/slopes.html\n\n\nhttps://marginaleffects.com/\n\n\nslopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = FALSE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_slopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = TRUE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the slopes.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\nstring:\n\n\n\"mean\": Slopes evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Slopes evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Slopes evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Slopes evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Slopes evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute slopes or comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA \"slope\" or \"marginal effect\" is the partial derivative of the regression equation with respect to a variable in the model. This function uses automatic differentiation to compute slopes for a vast array of models, including non-linear models with transformations (e.g., polynomials). Uncertainty estimates are computed using the delta method.\nNumerical derivatives for the slopes function are calculated using a simple epsilon difference approach: \\(\\partial Y / \\partial X = (f(X + \\varepsilon/2) - f(X-\\varepsilon/2)) / \\varepsilon\\), where f is the predict() method associated with the model class, and \\(\\varepsilon\\) is determined by the eps argument.\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_slopes(): Average slopes\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Unit-level (conditional) Marginal Effects\nmod <- glm(am ~ hp * wt, data = mtcars, family = binomial)\nmfx <- slopes(mod)\nhead(mfx)\n\n# Average Marginal Effect (AME)\navg_slopes(mod, by = TRUE)\n\n\n# Marginal Effect at the Mean (MEM)\nslopes(mod, newdata = datagrid())\n\n# Marginal Effect at User-Specified Values\n# Variables not explicitly included in `datagrid()` are held at their means\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n# Group-Average Marginal Effects (G-AME)\n# Calculate marginal effects for each observation, and then take the average\n# marginal effect within each subset of observations with different observed\n# values for the `cyl` variable:\nmod2 <- lm(mpg ~ hp * cyl, data = mtcars)\navg_slopes(mod2, variables = \"hp\", by = \"cyl\")\n\n# Marginal Effects at User-Specified Values (counterfactual)\n# Variables not explicitly included in `datagrid()` are held at their\n# original values, and the whole dataset is duplicated once for each\n# combination of the values in `datagrid()`\nmfx <- slopes(mod,\n newdata = datagrid(\n hp = c(100, 110),\n grid_type = \"counterfactual\"))\nhead(mfx)\n\n# Heteroskedasticity robust standard errors\nmfx <- slopes(mod, vcov = sandwich::vcovHC(mod))\nhead(mfx)\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(\n c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncolnames(lc) <- c(\"Contrast A\", \"Contrast B\")\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = lc)", + "text": "Extract Posterior Draws or Bootstrap Resamples from marginaleffects Objects\n\n\n\nposterior_draws(x, shape = \"long\")\n\n\n\n\n\n\n\nx\n\n\nAn object produced by a marginaleffects package function, such as predictions(), avg_slopes(), hypotheses(), etc.\n\n\n\n\nshape\n\n\nstring indicating the shape of the output format:\n\n\n\"long\": long format data frame\n\n\n\"DxP\": Matrix with draws as rows and parameters as columns\n\n\n\"PxD\": Matrix with draws as rows and parameters as columns\n\n\n\"rvar\": Random variable datatype (see posterior package documentation).\n\n\n\n\n\n\n\n\nA data.frame with drawid and draw columns.", "crumbs": [ "Model to Meaning", "Functions", - "`slopes`" + "`posterior_draws`" ] }, { - "objectID": "man/slopes.html#slopes-aka-partial-derivatives-marginal-effects-or-trends", - "href": "man/slopes.html#slopes-aka-partial-derivatives-marginal-effects-or-trends", + "objectID": "man/posterior_draws.html#extract-posterior-draws-or-bootstrap-resamples-from-marginaleffects-objects", + "href": "man/posterior_draws.html#extract-posterior-draws-or-bootstrap-resamples-from-marginaleffects-objects", "title": "", "section": "", - "text": "Partial derivative of the regression equation with respect to a regressor of interest.\n\n\nslopes(): unit-level (conditional) estimates.\n\n\navg_slopes(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the slopes vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/slopes.html\n\n\nhttps://marginaleffects.com/\n\n\nslopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = FALSE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_slopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = TRUE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the slopes.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\nstring:\n\n\n\"mean\": Slopes evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Slopes evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Slopes evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Slopes evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Slopes evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute slopes or comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA \"slope\" or \"marginal effect\" is the partial derivative of the regression equation with respect to a variable in the model. This function uses automatic differentiation to compute slopes for a vast array of models, including non-linear models with transformations (e.g., polynomials). Uncertainty estimates are computed using the delta method.\nNumerical derivatives for the slopes function are calculated using a simple epsilon difference approach: \\(\\partial Y / \\partial X = (f(X + \\varepsilon/2) - f(X-\\varepsilon/2)) / \\varepsilon\\), where f is the predict() method associated with the model class, and \\(\\varepsilon\\) is determined by the eps argument.\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_slopes(): Average slopes\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Unit-level (conditional) Marginal Effects\nmod <- glm(am ~ hp * wt, data = mtcars, family = binomial)\nmfx <- slopes(mod)\nhead(mfx)\n\n# Average Marginal Effect (AME)\navg_slopes(mod, by = TRUE)\n\n\n# Marginal Effect at the Mean (MEM)\nslopes(mod, newdata = datagrid())\n\n# Marginal Effect at User-Specified Values\n# Variables not explicitly included in `datagrid()` are held at their means\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n# Group-Average Marginal Effects (G-AME)\n# Calculate marginal effects for each observation, and then take the average\n# marginal effect within each subset of observations with different observed\n# values for the `cyl` variable:\nmod2 <- lm(mpg ~ hp * cyl, data = mtcars)\navg_slopes(mod2, variables = \"hp\", by = \"cyl\")\n\n# Marginal Effects at User-Specified Values (counterfactual)\n# Variables not explicitly included in `datagrid()` are held at their\n# original values, and the whole dataset is duplicated once for each\n# combination of the values in `datagrid()`\nmfx <- slopes(mod,\n newdata = datagrid(\n hp = c(100, 110),\n grid_type = \"counterfactual\"))\nhead(mfx)\n\n# Heteroskedasticity robust standard errors\nmfx <- slopes(mod, vcov = sandwich::vcovHC(mod))\nhead(mfx)\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(\n c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncolnames(lc) <- c(\"Contrast A\", \"Contrast B\")\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = lc)", + "text": "Extract Posterior Draws or Bootstrap Resamples from marginaleffects Objects\n\n\n\nposterior_draws(x, shape = \"long\")\n\n\n\n\n\n\n\nx\n\n\nAn object produced by a marginaleffects package function, such as predictions(), avg_slopes(), hypotheses(), etc.\n\n\n\n\nshape\n\n\nstring indicating the shape of the output format:\n\n\n\"long\": long format data frame\n\n\n\"DxP\": Matrix with draws as rows and parameters as columns\n\n\n\"PxD\": Matrix with draws as rows and parameters as columns\n\n\n\"rvar\": Random variable datatype (see posterior package documentation).\n\n\n\n\n\n\n\n\nA data.frame with drawid and draw columns.", "crumbs": [ "Model to Meaning", "Functions", - "`slopes`" + "`posterior_draws`" ] }, { - "objectID": "man/plot_predictions.html", - "href": "man/plot_predictions.html", + "objectID": "man/plot_comparisons.html", + "href": "man/plot_comparisons.html", "title": "", "section": "", - "text": "Plot predictions on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal predictions, that is, predictions made on the original data, but averaged by subgroups. This is analogous to using the by argument in the predictions() function.\nThe condition argument is used to plot conditional predictions, that is, predictions made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a predictions() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_predictions(\n model,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n transform = NULL,\n points = 0,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\ncondition\n\n\nConditional predictions\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum\n\n\n\n\n\n\nby\n\n\nMarginal predictions\n\n\nCharacter vector (max length 3): Names of the categorical predictors to marginalize across.\n\n\n1: x-axis. 2: color. 3: facets.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the predictions() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\npoints\n\n\nNumber between 0 and 1 which controls the transparency of raw data points. 0 (default) does not display any points. Warning: The points displayed are raw data, so the resulting plot is not a \"partial residual plot.\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object or data frame (if draw=FALSE)\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp + wt, data = mtcars)\nplot_predictions(mod, condition = \"wt\")\n\n\n\n\n\n\nmod <- lm(mpg ~ hp * wt * am, data = mtcars)\nplot_predictions(mod, condition = c(\"hp\", \"wt\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = \"threenum\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = range))", + "text": "Plot comparisons on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal comparisons, that is, comparisons made on the original data, but averaged by subgroups. This is analogous to using the by argument in the comparisons() function.\nThe condition argument is used to plot conditional comparisons, that is, comparisons made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a comparisons() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_comparisons(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n comparison = \"difference\",\n transform = NULL,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose contrast we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the comparisons() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_comparisons(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", "crumbs": [ "Model to Meaning", "Functions", - "`plot_predictions`" + "`plot_comparisons`" ] }, { - "objectID": "man/plot_predictions.html#plot-conditional-or-marginal-predictions", - "href": "man/plot_predictions.html#plot-conditional-or-marginal-predictions", + "objectID": "man/plot_comparisons.html#plot-conditional-or-marginal-comparisons", + "href": "man/plot_comparisons.html#plot-conditional-or-marginal-comparisons", "title": "", "section": "", - "text": "Plot predictions on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal predictions, that is, predictions made on the original data, but averaged by subgroups. This is analogous to using the by argument in the predictions() function.\nThe condition argument is used to plot conditional predictions, that is, predictions made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a predictions() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_predictions(\n model,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n transform = NULL,\n points = 0,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\ncondition\n\n\nConditional predictions\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum\n\n\n\n\n\n\nby\n\n\nMarginal predictions\n\n\nCharacter vector (max length 3): Names of the categorical predictors to marginalize across.\n\n\n1: x-axis. 2: color. 3: facets.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the predictions() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\npoints\n\n\nNumber between 0 and 1 which controls the transparency of raw data points. 0 (default) does not display any points. Warning: The points displayed are raw data, so the resulting plot is not a \"partial residual plot.\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object or data frame (if draw=FALSE)\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp + wt, data = mtcars)\nplot_predictions(mod, condition = \"wt\")\n\n\n\n\n\n\nmod <- lm(mpg ~ hp * wt * am, data = mtcars)\nplot_predictions(mod, condition = c(\"hp\", \"wt\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = \"threenum\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = range))", + "text": "Plot comparisons on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal comparisons, that is, comparisons made on the original data, but averaged by subgroups. This is analogous to using the by argument in the comparisons() function.\nThe condition argument is used to plot conditional comparisons, that is, comparisons made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a comparisons() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_comparisons(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n comparison = \"difference\",\n transform = NULL,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose contrast we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the comparisons() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_comparisons(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", "crumbs": [ "Model to Meaning", "Functions", - "`plot_predictions`" + "`plot_comparisons`" ] }, { - "objectID": "man/plot_slopes.html", - "href": "man/plot_slopes.html", + "objectID": "man/comparisons.html", + "href": "man/comparisons.html", "title": "", "section": "", - "text": "Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal slopes, that is, slopes made on the original data, but averaged by subgroups. This is analogous to using the by argument in the slopes() function.\nThe condition argument is used to plot conditional slopes, that is, slopes computed on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a slopes() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below. See the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_slopes(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n slope = \"dydx\",\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose marginal effect (slope) we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the slopes() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_slopes(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", + "text": "Predict the outcome variable at different regressor values (e.g., college graduates vs. others), and compare those predictions by computing a difference, ratio, or some other function. comparisons() can return many quantities of interest, such as contrasts, differences, risk ratios, changes in log odds, lift, slopes, elasticities, etc.\n\n\ncomparisons(): unit-level (conditional) estimates.\n\n\navg_comparisons(): average (marginal) estimates.\n\n\nvariables identifies the focal regressors whose \"effect\" we are interested in. comparison determines how predictions with different regressor values are compared (difference, ratio, odds, etc.). The newdata argument and the datagrid() function control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the comparisons vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/comparisons.html\n\n\nhttps://marginaleffects.com/\n\n\ncomparisons(\n model,\n newdata = NULL,\n variables = NULL,\n comparison = \"difference\",\n type = NULL,\n vcov = TRUE,\n by = FALSE,\n conf_level = 0.95,\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_comparisons(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n vcov = TRUE,\n by = TRUE,\n conf_level = 0.95,\n comparison = \"difference\",\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the comparisons.\n\n\nWarning: Avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level contrasts for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndata frame: Unit-level contrasts for each row of the newdata data frame.\n\n\nstring:\n\n\n\"mean\": Contrasts at the Mean. Contrasts when each predictor is held at its mean or mode.\n\n\n\"median\": Contrasts at the Median. Contrasts when each predictor is held at its median or mode.\n\n\n\"balanced\": Contrasts evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Contrasts at Tukey’s 5 numbers.\n\n\n\"grid\": Contrasts on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nnewdata = datagrid(mpg = fivenum): mpg variable held at Tukey’s five numbers (using the fivenum function), and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\nNamed list: names identify the subset of variables of interest, and values define the type of contrast to compute. Acceptable values depend on the variable type:\n\n\nFactor or character variables:\n\n\n\"reference\": Each factor level is compared to the factor reference (base) level\n\n\n\"all\": All combinations of observed levels\n\n\n\"sequential\": Each factor level is compared to the previous factor level\n\n\n\"pairwise\": Each factor level is compared to all other levels\n\n\n\"minmax\": The highest and lowest levels of a factor.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses.\n\n\nVector of length 2 with the two values to compare.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nLogical variables:\n\n\nNULL: contrast between TRUE and FALSE\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nNumeric variables:\n\n\nNumeric of length 1: Forward contrast for a gap of x, computed between the observed value and the observed value plus x. Users can set a global option to get a \"center\" or \"backward\" contrast instead: options(marginaleffects_contrast_direction=“center”)\n\n\nNumeric vector of length 2: Contrast between the largest and the smallest elements of the x vector.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\n\nExamples:\n\n\nvariables = list(gear = “pairwise”, hp = 10)\n\n\nvariables = list(gear = “sequential”, hp = c(100, 120))\n\n\nvariables = list(hp = (x) data.frame(low = x - 5, high = x + 10))\n\n\nSee the Examples section below for more.\n\n\n\n\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\ncross\n\n\n\n\nFALSE: Contrasts represent the change in adjusted predictions when one predictor changes and all other variables are held constant.\n\n\nTRUE: Contrasts represent the changes in adjusted predictions when all the predictors specified in the variables argument are manipulated simultaneously (a \"cross-contrast\").\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_comparisons(): Average comparisons\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe following transformations can be applied by supplying one of the shortcut strings to the comparison argument. hi is a vector of adjusted predictions for the \"high\" side of the contrast. lo is a vector of adjusted predictions for the \"low\" side of the contrast. y is a vector of adjusted predictions for the original data. x is the predictor in the original data. eps is the step size to use to compute derivatives and elasticities.\n\n\n\nShortcut\n\n\nFunction\n\n\n\n\ndifference\n\n\n(hi, lo) hi - lo\n\n\n\n\ndifferenceavg\n\n\n(hi, lo) mean(hi - lo)\n\n\n\n\ndydx\n\n\n(hi, lo, eps) (hi - lo)/eps\n\n\n\n\neyex\n\n\n(hi, lo, eps, y, x) (hi - lo)/eps * (x/y)\n\n\n\n\neydx\n\n\n(hi, lo, eps, y, x) ((hi - lo)/eps)/y\n\n\n\n\ndyex\n\n\n(hi, lo, eps, x) ((hi - lo)/eps) * x\n\n\n\n\ndydxavg\n\n\n(hi, lo, eps) mean((hi - lo)/eps)\n\n\n\n\neyexavg\n\n\n(hi, lo, eps, y, x) mean((hi - lo)/eps * (x/y))\n\n\n\n\neydxavg\n\n\n(hi, lo, eps, y, x) mean(((hi - lo)/eps)/y)\n\n\n\n\ndyexavg\n\n\n(hi, lo, eps, x) mean(((hi - lo)/eps) * x)\n\n\n\n\nratio\n\n\n(hi, lo) hi/lo\n\n\n\n\nratioavg\n\n\n(hi, lo) mean(hi)/mean(lo)\n\n\n\n\nlnratio\n\n\n(hi, lo) log(hi/lo)\n\n\n\n\nlnratioavg\n\n\n(hi, lo) log(mean(hi)/mean(lo))\n\n\n\n\nlnor\n\n\n(hi, lo) log((hi/(1 - hi))/(lo/(1 - lo)))\n\n\n\n\nlnoravg\n\n\n(hi, lo) log((mean(hi)/(1 - mean(hi)))/(mean(lo)/(1 - mean(lo))))\n\n\n\n\nlift\n\n\n(hi, lo) (hi - lo)/lo\n\n\n\n\nliftavg\n\n\n(hi, lo) (mean(hi - lo))/mean(lo)\n\n\n\n\nexpdydx\n\n\n(hi, lo, eps) ((exp(hi) - exp(lo))/exp(eps))/eps\n\n\n\n\nexpdydxavg\n\n\n(hi, lo, eps) mean(((exp(hi) - exp(lo))/exp(eps))/eps)\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\nlibrary(marginaleffects)\n\n# Linear model\ntmp <- mtcars\ntmp$am <- as.logical(tmp$am)\nmod <- lm(mpg ~ am + factor(cyl), tmp)\navg_comparisons(mod, variables = list(cyl = \"reference\"))\navg_comparisons(mod, variables = list(cyl = \"sequential\"))\navg_comparisons(mod, variables = list(cyl = \"pairwise\"))\n\n# GLM with different scale types\nmod <- glm(am ~ factor(gear), data = mtcars)\navg_comparisons(mod, type = \"response\")\navg_comparisons(mod, type = \"link\")\n\n# Contrasts at the mean\ncomparisons(mod, newdata = \"mean\")\n\n# Contrasts between marginal means\ncomparisons(mod, newdata = \"marginalmeans\")\n\n# Contrasts at user-specified values\ncomparisons(mod, newdata = datagrid(am = 0, gear = tmp$gear))\ncomparisons(mod, newdata = datagrid(am = unique, gear = max))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\ncomparisons(m, variables = \"hp\", newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Numeric contrasts\nmod <- lm(mpg ~ hp, data = mtcars)\navg_comparisons(mod, variables = list(hp = 1))\navg_comparisons(mod, variables = list(hp = 5))\navg_comparisons(mod, variables = list(hp = c(90, 100)))\navg_comparisons(mod, variables = list(hp = \"iqr\"))\navg_comparisons(mod, variables = list(hp = \"sd\"))\navg_comparisons(mod, variables = list(hp = \"minmax\"))\n\n# using a function to specify a custom difference in one regressor\ndat <- mtcars\ndat$new_hp <- 49 * (dat$hp - min(dat$hp)) / (max(dat$hp) - min(dat$hp)) + 1\nmodlog <- lm(mpg ~ log(new_hp) + factor(cyl), data = dat)\nfdiff <- \\(x) data.frame(x, x + 10)\navg_comparisons(modlog, variables = list(new_hp = fdiff))\n\n# Adjusted Risk Ratio: see the contrasts vignette\nmod <- glm(vs ~ mpg, data = mtcars, family = binomial)\navg_comparisons(mod, comparison = \"lnratioavg\", transform = exp)\n\n# Adjusted Risk Ratio: Manual specification of the `comparison`\navg_comparisons(\n mod,\n comparison = function(hi, lo) log(mean(hi) / mean(lo)),\n transform = exp)\n# cross contrasts\nmod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars)\navg_comparisons(mod, variables = c(\"cyl\", \"gear\"), cross = TRUE)\n\n# variable-specific contrasts\navg_comparisons(mod, variables = list(gear = \"sequential\", hp = 10))\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = lc)\n\n# Effect of a 1 group-wise standard deviation change\n# First we calculate the SD in each group of `cyl`\n# Second, we use that SD as the treatment size in the `variables` argument\nlibrary(dplyr)\nmod <- lm(mpg ~ hp + factor(cyl), mtcars)\ntmp <- mtcars %>%\n group_by(cyl) %>%\n mutate(hp_sd = sd(hp))\navg_comparisons(mod, \n variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)),\n by = \"cyl\")\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\ncomparisons(mod, by = TRUE)\n\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\navg_comparisons(mod, variables = \"hp\", by = c(\"vs\", \"am\"))\n\nlibrary(nnet)\nmod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\ncomparisons(mod, type = \"probs\", by = by)", "crumbs": [ "Model to Meaning", "Functions", - "`plot_slopes`" + "`comparisons`" ] }, { - "objectID": "man/plot_slopes.html#plot-conditional-or-marginal-slopes", - "href": "man/plot_slopes.html#plot-conditional-or-marginal-slopes", + "objectID": "man/comparisons.html#comparisons-between-predictions-made-with-different-regressor-values", + "href": "man/comparisons.html#comparisons-between-predictions-made-with-different-regressor-values", "title": "", "section": "", - "text": "Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal slopes, that is, slopes made on the original data, but averaged by subgroups. This is analogous to using the by argument in the slopes() function.\nThe condition argument is used to plot conditional slopes, that is, slopes computed on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a slopes() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below. See the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_slopes(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n slope = \"dydx\",\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose marginal effect (slope) we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the slopes() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_slopes(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", + "text": "Predict the outcome variable at different regressor values (e.g., college graduates vs. others), and compare those predictions by computing a difference, ratio, or some other function. comparisons() can return many quantities of interest, such as contrasts, differences, risk ratios, changes in log odds, lift, slopes, elasticities, etc.\n\n\ncomparisons(): unit-level (conditional) estimates.\n\n\navg_comparisons(): average (marginal) estimates.\n\n\nvariables identifies the focal regressors whose \"effect\" we are interested in. comparison determines how predictions with different regressor values are compared (difference, ratio, odds, etc.). The newdata argument and the datagrid() function control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the comparisons vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/comparisons.html\n\n\nhttps://marginaleffects.com/\n\n\ncomparisons(\n model,\n newdata = NULL,\n variables = NULL,\n comparison = \"difference\",\n type = NULL,\n vcov = TRUE,\n by = FALSE,\n conf_level = 0.95,\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_comparisons(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n vcov = TRUE,\n by = TRUE,\n conf_level = 0.95,\n comparison = \"difference\",\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the comparisons.\n\n\nWarning: Avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level contrasts for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndata frame: Unit-level contrasts for each row of the newdata data frame.\n\n\nstring:\n\n\n\"mean\": Contrasts at the Mean. Contrasts when each predictor is held at its mean or mode.\n\n\n\"median\": Contrasts at the Median. Contrasts when each predictor is held at its median or mode.\n\n\n\"balanced\": Contrasts evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Contrasts at Tukey’s 5 numbers.\n\n\n\"grid\": Contrasts on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nnewdata = datagrid(mpg = fivenum): mpg variable held at Tukey’s five numbers (using the fivenum function), and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\nNamed list: names identify the subset of variables of interest, and values define the type of contrast to compute. Acceptable values depend on the variable type:\n\n\nFactor or character variables:\n\n\n\"reference\": Each factor level is compared to the factor reference (base) level\n\n\n\"all\": All combinations of observed levels\n\n\n\"sequential\": Each factor level is compared to the previous factor level\n\n\n\"pairwise\": Each factor level is compared to all other levels\n\n\n\"minmax\": The highest and lowest levels of a factor.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses.\n\n\nVector of length 2 with the two values to compare.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nLogical variables:\n\n\nNULL: contrast between TRUE and FALSE\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nNumeric variables:\n\n\nNumeric of length 1: Forward contrast for a gap of x, computed between the observed value and the observed value plus x. Users can set a global option to get a \"center\" or \"backward\" contrast instead: options(marginaleffects_contrast_direction=“center”)\n\n\nNumeric vector of length 2: Contrast between the largest and the smallest elements of the x vector.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\n\nExamples:\n\n\nvariables = list(gear = “pairwise”, hp = 10)\n\n\nvariables = list(gear = “sequential”, hp = c(100, 120))\n\n\nvariables = list(hp = (x) data.frame(low = x - 5, high = x + 10))\n\n\nSee the Examples section below for more.\n\n\n\n\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\ncross\n\n\n\n\nFALSE: Contrasts represent the change in adjusted predictions when one predictor changes and all other variables are held constant.\n\n\nTRUE: Contrasts represent the changes in adjusted predictions when all the predictors specified in the variables argument are manipulated simultaneously (a \"cross-contrast\").\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_comparisons(): Average comparisons\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe following transformations can be applied by supplying one of the shortcut strings to the comparison argument. hi is a vector of adjusted predictions for the \"high\" side of the contrast. lo is a vector of adjusted predictions for the \"low\" side of the contrast. y is a vector of adjusted predictions for the original data. x is the predictor in the original data. eps is the step size to use to compute derivatives and elasticities.\n\n\n\nShortcut\n\n\nFunction\n\n\n\n\ndifference\n\n\n(hi, lo) hi - lo\n\n\n\n\ndifferenceavg\n\n\n(hi, lo) mean(hi - lo)\n\n\n\n\ndydx\n\n\n(hi, lo, eps) (hi - lo)/eps\n\n\n\n\neyex\n\n\n(hi, lo, eps, y, x) (hi - lo)/eps * (x/y)\n\n\n\n\neydx\n\n\n(hi, lo, eps, y, x) ((hi - lo)/eps)/y\n\n\n\n\ndyex\n\n\n(hi, lo, eps, x) ((hi - lo)/eps) * x\n\n\n\n\ndydxavg\n\n\n(hi, lo, eps) mean((hi - lo)/eps)\n\n\n\n\neyexavg\n\n\n(hi, lo, eps, y, x) mean((hi - lo)/eps * (x/y))\n\n\n\n\neydxavg\n\n\n(hi, lo, eps, y, x) mean(((hi - lo)/eps)/y)\n\n\n\n\ndyexavg\n\n\n(hi, lo, eps, x) mean(((hi - lo)/eps) * x)\n\n\n\n\nratio\n\n\n(hi, lo) hi/lo\n\n\n\n\nratioavg\n\n\n(hi, lo) mean(hi)/mean(lo)\n\n\n\n\nlnratio\n\n\n(hi, lo) log(hi/lo)\n\n\n\n\nlnratioavg\n\n\n(hi, lo) log(mean(hi)/mean(lo))\n\n\n\n\nlnor\n\n\n(hi, lo) log((hi/(1 - hi))/(lo/(1 - lo)))\n\n\n\n\nlnoravg\n\n\n(hi, lo) log((mean(hi)/(1 - mean(hi)))/(mean(lo)/(1 - mean(lo))))\n\n\n\n\nlift\n\n\n(hi, lo) (hi - lo)/lo\n\n\n\n\nliftavg\n\n\n(hi, lo) (mean(hi - lo))/mean(lo)\n\n\n\n\nexpdydx\n\n\n(hi, lo, eps) ((exp(hi) - exp(lo))/exp(eps))/eps\n\n\n\n\nexpdydxavg\n\n\n(hi, lo, eps) mean(((exp(hi) - exp(lo))/exp(eps))/eps)\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\nlibrary(marginaleffects)\n\n# Linear model\ntmp <- mtcars\ntmp$am <- as.logical(tmp$am)\nmod <- lm(mpg ~ am + factor(cyl), tmp)\navg_comparisons(mod, variables = list(cyl = \"reference\"))\navg_comparisons(mod, variables = list(cyl = \"sequential\"))\navg_comparisons(mod, variables = list(cyl = \"pairwise\"))\n\n# GLM with different scale types\nmod <- glm(am ~ factor(gear), data = mtcars)\navg_comparisons(mod, type = \"response\")\navg_comparisons(mod, type = \"link\")\n\n# Contrasts at the mean\ncomparisons(mod, newdata = \"mean\")\n\n# Contrasts between marginal means\ncomparisons(mod, newdata = \"marginalmeans\")\n\n# Contrasts at user-specified values\ncomparisons(mod, newdata = datagrid(am = 0, gear = tmp$gear))\ncomparisons(mod, newdata = datagrid(am = unique, gear = max))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\ncomparisons(m, variables = \"hp\", newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Numeric contrasts\nmod <- lm(mpg ~ hp, data = mtcars)\navg_comparisons(mod, variables = list(hp = 1))\navg_comparisons(mod, variables = list(hp = 5))\navg_comparisons(mod, variables = list(hp = c(90, 100)))\navg_comparisons(mod, variables = list(hp = \"iqr\"))\navg_comparisons(mod, variables = list(hp = \"sd\"))\navg_comparisons(mod, variables = list(hp = \"minmax\"))\n\n# using a function to specify a custom difference in one regressor\ndat <- mtcars\ndat$new_hp <- 49 * (dat$hp - min(dat$hp)) / (max(dat$hp) - min(dat$hp)) + 1\nmodlog <- lm(mpg ~ log(new_hp) + factor(cyl), data = dat)\nfdiff <- \\(x) data.frame(x, x + 10)\navg_comparisons(modlog, variables = list(new_hp = fdiff))\n\n# Adjusted Risk Ratio: see the contrasts vignette\nmod <- glm(vs ~ mpg, data = mtcars, family = binomial)\navg_comparisons(mod, comparison = \"lnratioavg\", transform = exp)\n\n# Adjusted Risk Ratio: Manual specification of the `comparison`\navg_comparisons(\n mod,\n comparison = function(hi, lo) log(mean(hi) / mean(lo)),\n transform = exp)\n# cross contrasts\nmod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars)\navg_comparisons(mod, variables = c(\"cyl\", \"gear\"), cross = TRUE)\n\n# variable-specific contrasts\navg_comparisons(mod, variables = list(gear = \"sequential\", hp = 10))\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = lc)\n\n# Effect of a 1 group-wise standard deviation change\n# First we calculate the SD in each group of `cyl`\n# Second, we use that SD as the treatment size in the `variables` argument\nlibrary(dplyr)\nmod <- lm(mpg ~ hp + factor(cyl), mtcars)\ntmp <- mtcars %>%\n group_by(cyl) %>%\n mutate(hp_sd = sd(hp))\navg_comparisons(mod, \n variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)),\n by = \"cyl\")\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\ncomparisons(mod, by = TRUE)\n\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\navg_comparisons(mod, variables = \"hp\", by = c(\"vs\", \"am\"))\n\nlibrary(nnet)\nmod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\ncomparisons(mod, type = \"probs\", by = by)", "crumbs": [ "Model to Meaning", "Functions", - "`plot_slopes`" + "`comparisons`" ] }, { @@ -551,123 +558,123 @@ ] }, { - "objectID": "man/comparisons.html", - "href": "man/comparisons.html", + "objectID": "man/plot_slopes.html", + "href": "man/plot_slopes.html", "title": "", "section": "", - "text": "Predict the outcome variable at different regressor values (e.g., college graduates vs. others), and compare those predictions by computing a difference, ratio, or some other function. comparisons() can return many quantities of interest, such as contrasts, differences, risk ratios, changes in log odds, lift, slopes, elasticities, etc.\n\n\ncomparisons(): unit-level (conditional) estimates.\n\n\navg_comparisons(): average (marginal) estimates.\n\n\nvariables identifies the focal regressors whose \"effect\" we are interested in. comparison determines how predictions with different regressor values are compared (difference, ratio, odds, etc.). The newdata argument and the datagrid() function control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the comparisons vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/comparisons.html\n\n\nhttps://marginaleffects.com/\n\n\ncomparisons(\n model,\n newdata = NULL,\n variables = NULL,\n comparison = \"difference\",\n type = NULL,\n vcov = TRUE,\n by = FALSE,\n conf_level = 0.95,\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_comparisons(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n vcov = TRUE,\n by = TRUE,\n conf_level = 0.95,\n comparison = \"difference\",\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the comparisons.\n\n\nWarning: Avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level contrasts for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndata frame: Unit-level contrasts for each row of the newdata data frame.\n\n\nstring:\n\n\n\"mean\": Contrasts at the Mean. Contrasts when each predictor is held at its mean or mode.\n\n\n\"median\": Contrasts at the Median. Contrasts when each predictor is held at its median or mode.\n\n\n\"balanced\": Contrasts evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Contrasts at Tukey’s 5 numbers.\n\n\n\"grid\": Contrasts on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nnewdata = datagrid(mpg = fivenum): mpg variable held at Tukey’s five numbers (using the fivenum function), and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\nNamed list: names identify the subset of variables of interest, and values define the type of contrast to compute. Acceptable values depend on the variable type:\n\n\nFactor or character variables:\n\n\n\"reference\": Each factor level is compared to the factor reference (base) level\n\n\n\"all\": All combinations of observed levels\n\n\n\"sequential\": Each factor level is compared to the previous factor level\n\n\n\"pairwise\": Each factor level is compared to all other levels\n\n\n\"minmax\": The highest and lowest levels of a factor.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses.\n\n\nVector of length 2 with the two values to compare.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nLogical variables:\n\n\nNULL: contrast between TRUE and FALSE\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nNumeric variables:\n\n\nNumeric of length 1: Forward contrast for a gap of x, computed between the observed value and the observed value plus x. Users can set a global option to get a \"center\" or \"backward\" contrast instead: options(marginaleffects_contrast_direction=“center”)\n\n\nNumeric vector of length 2: Contrast between the largest and the smallest elements of the x vector.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\n\nExamples:\n\n\nvariables = list(gear = “pairwise”, hp = 10)\n\n\nvariables = list(gear = “sequential”, hp = c(100, 120))\n\n\nvariables = list(hp = (x) data.frame(low = x - 5, high = x + 10))\n\n\nSee the Examples section below for more.\n\n\n\n\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\ncross\n\n\n\n\nFALSE: Contrasts represent the change in adjusted predictions when one predictor changes and all other variables are held constant.\n\n\nTRUE: Contrasts represent the changes in adjusted predictions when all the predictors specified in the variables argument are manipulated simultaneously (a \"cross-contrast\").\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_comparisons(): Average comparisons\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe following transformations can be applied by supplying one of the shortcut strings to the comparison argument. hi is a vector of adjusted predictions for the \"high\" side of the contrast. lo is a vector of adjusted predictions for the \"low\" side of the contrast. y is a vector of adjusted predictions for the original data. x is the predictor in the original data. eps is the step size to use to compute derivatives and elasticities.\n\n\n\nShortcut\n\n\nFunction\n\n\n\n\ndifference\n\n\n(hi, lo) hi - lo\n\n\n\n\ndifferenceavg\n\n\n(hi, lo) mean(hi - lo)\n\n\n\n\ndydx\n\n\n(hi, lo, eps) (hi - lo)/eps\n\n\n\n\neyex\n\n\n(hi, lo, eps, y, x) (hi - lo)/eps * (x/y)\n\n\n\n\neydx\n\n\n(hi, lo, eps, y, x) ((hi - lo)/eps)/y\n\n\n\n\ndyex\n\n\n(hi, lo, eps, x) ((hi - lo)/eps) * x\n\n\n\n\ndydxavg\n\n\n(hi, lo, eps) mean((hi - lo)/eps)\n\n\n\n\neyexavg\n\n\n(hi, lo, eps, y, x) mean((hi - lo)/eps * (x/y))\n\n\n\n\neydxavg\n\n\n(hi, lo, eps, y, x) mean(((hi - lo)/eps)/y)\n\n\n\n\ndyexavg\n\n\n(hi, lo, eps, x) mean(((hi - lo)/eps) * x)\n\n\n\n\nratio\n\n\n(hi, lo) hi/lo\n\n\n\n\nratioavg\n\n\n(hi, lo) mean(hi)/mean(lo)\n\n\n\n\nlnratio\n\n\n(hi, lo) log(hi/lo)\n\n\n\n\nlnratioavg\n\n\n(hi, lo) log(mean(hi)/mean(lo))\n\n\n\n\nlnor\n\n\n(hi, lo) log((hi/(1 - hi))/(lo/(1 - lo)))\n\n\n\n\nlnoravg\n\n\n(hi, lo) log((mean(hi)/(1 - mean(hi)))/(mean(lo)/(1 - mean(lo))))\n\n\n\n\nlift\n\n\n(hi, lo) (hi - lo)/lo\n\n\n\n\nliftavg\n\n\n(hi, lo) (mean(hi - lo))/mean(lo)\n\n\n\n\nexpdydx\n\n\n(hi, lo, eps) ((exp(hi) - exp(lo))/exp(eps))/eps\n\n\n\n\nexpdydxavg\n\n\n(hi, lo, eps) mean(((exp(hi) - exp(lo))/exp(eps))/eps)\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\nlibrary(marginaleffects)\n\n# Linear model\ntmp <- mtcars\ntmp$am <- as.logical(tmp$am)\nmod <- lm(mpg ~ am + factor(cyl), tmp)\navg_comparisons(mod, variables = list(cyl = \"reference\"))\navg_comparisons(mod, variables = list(cyl = \"sequential\"))\navg_comparisons(mod, variables = list(cyl = \"pairwise\"))\n\n# GLM with different scale types\nmod <- glm(am ~ factor(gear), data = mtcars)\navg_comparisons(mod, type = \"response\")\navg_comparisons(mod, type = \"link\")\n\n# Contrasts at the mean\ncomparisons(mod, newdata = \"mean\")\n\n# Contrasts between marginal means\ncomparisons(mod, newdata = \"marginalmeans\")\n\n# Contrasts at user-specified values\ncomparisons(mod, newdata = datagrid(am = 0, gear = tmp$gear))\ncomparisons(mod, newdata = datagrid(am = unique, gear = max))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\ncomparisons(m, variables = \"hp\", newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Numeric contrasts\nmod <- lm(mpg ~ hp, data = mtcars)\navg_comparisons(mod, variables = list(hp = 1))\navg_comparisons(mod, variables = list(hp = 5))\navg_comparisons(mod, variables = list(hp = c(90, 100)))\navg_comparisons(mod, variables = list(hp = \"iqr\"))\navg_comparisons(mod, variables = list(hp = \"sd\"))\navg_comparisons(mod, variables = list(hp = \"minmax\"))\n\n# using a function to specify a custom difference in one regressor\ndat <- mtcars\ndat$new_hp <- 49 * (dat$hp - min(dat$hp)) / (max(dat$hp) - min(dat$hp)) + 1\nmodlog <- lm(mpg ~ log(new_hp) + factor(cyl), data = dat)\nfdiff <- \\(x) data.frame(x, x + 10)\navg_comparisons(modlog, variables = list(new_hp = fdiff))\n\n# Adjusted Risk Ratio: see the contrasts vignette\nmod <- glm(vs ~ mpg, data = mtcars, family = binomial)\navg_comparisons(mod, comparison = \"lnratioavg\", transform = exp)\n\n# Adjusted Risk Ratio: Manual specification of the `comparison`\navg_comparisons(\n mod,\n comparison = function(hi, lo) log(mean(hi) / mean(lo)),\n transform = exp)\n# cross contrasts\nmod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars)\navg_comparisons(mod, variables = c(\"cyl\", \"gear\"), cross = TRUE)\n\n# variable-specific contrasts\navg_comparisons(mod, variables = list(gear = \"sequential\", hp = 10))\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = lc)\n\n# Effect of a 1 group-wise standard deviation change\n# First we calculate the SD in each group of `cyl`\n# Second, we use that SD as the treatment size in the `variables` argument\nlibrary(dplyr)\nmod <- lm(mpg ~ hp + factor(cyl), mtcars)\ntmp <- mtcars %>%\n group_by(cyl) %>%\n mutate(hp_sd = sd(hp))\navg_comparisons(mod, \n variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)),\n by = \"cyl\")\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\ncomparisons(mod, by = TRUE)\n\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\navg_comparisons(mod, variables = \"hp\", by = c(\"vs\", \"am\"))\n\nlibrary(nnet)\nmod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\ncomparisons(mod, type = \"probs\", by = by)", + "text": "Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal slopes, that is, slopes made on the original data, but averaged by subgroups. This is analogous to using the by argument in the slopes() function.\nThe condition argument is used to plot conditional slopes, that is, slopes computed on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a slopes() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below. See the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_slopes(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n slope = \"dydx\",\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose marginal effect (slope) we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the slopes() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_slopes(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", "crumbs": [ "Model to Meaning", "Functions", - "`comparisons`" + "`plot_slopes`" ] }, { - "objectID": "man/comparisons.html#comparisons-between-predictions-made-with-different-regressor-values", - "href": "man/comparisons.html#comparisons-between-predictions-made-with-different-regressor-values", + "objectID": "man/plot_slopes.html#plot-conditional-or-marginal-slopes", + "href": "man/plot_slopes.html#plot-conditional-or-marginal-slopes", "title": "", "section": "", - "text": "Predict the outcome variable at different regressor values (e.g., college graduates vs. others), and compare those predictions by computing a difference, ratio, or some other function. comparisons() can return many quantities of interest, such as contrasts, differences, risk ratios, changes in log odds, lift, slopes, elasticities, etc.\n\n\ncomparisons(): unit-level (conditional) estimates.\n\n\navg_comparisons(): average (marginal) estimates.\n\n\nvariables identifies the focal regressors whose \"effect\" we are interested in. comparison determines how predictions with different regressor values are compared (difference, ratio, odds, etc.). The newdata argument and the datagrid() function control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the comparisons vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/comparisons.html\n\n\nhttps://marginaleffects.com/\n\n\ncomparisons(\n model,\n newdata = NULL,\n variables = NULL,\n comparison = \"difference\",\n type = NULL,\n vcov = TRUE,\n by = FALSE,\n conf_level = 0.95,\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_comparisons(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n vcov = TRUE,\n by = TRUE,\n conf_level = 0.95,\n comparison = \"difference\",\n transform = NULL,\n cross = FALSE,\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the comparisons.\n\n\nWarning: Avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level contrasts for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndata frame: Unit-level contrasts for each row of the newdata data frame.\n\n\nstring:\n\n\n\"mean\": Contrasts at the Mean. Contrasts when each predictor is held at its mean or mode.\n\n\n\"median\": Contrasts at the Median. Contrasts when each predictor is held at its median or mode.\n\n\n\"balanced\": Contrasts evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Contrasts at Tukey’s 5 numbers.\n\n\n\"grid\": Contrasts on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nnewdata = datagrid(mpg = fivenum): mpg variable held at Tukey’s five numbers (using the fivenum function), and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\nNamed list: names identify the subset of variables of interest, and values define the type of contrast to compute. Acceptable values depend on the variable type:\n\n\nFactor or character variables:\n\n\n\"reference\": Each factor level is compared to the factor reference (base) level\n\n\n\"all\": All combinations of observed levels\n\n\n\"sequential\": Each factor level is compared to the previous factor level\n\n\n\"pairwise\": Each factor level is compared to all other levels\n\n\n\"minmax\": The highest and lowest levels of a factor.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses.\n\n\nVector of length 2 with the two values to compare.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nLogical variables:\n\n\nNULL: contrast between TRUE and FALSE\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\n\nNumeric variables:\n\n\nNumeric of length 1: Forward contrast for a gap of x, computed between the observed value and the observed value plus x. Users can set a global option to get a \"center\" or \"backward\" contrast instead: options(marginaleffects_contrast_direction=“center”)\n\n\nNumeric vector of length 2: Contrast between the largest and the smallest elements of the x vector.\n\n\nData frame with the same number of rows as newdata, with two columns of \"lo\" and \"hi\" values to compare.\n\n\nFunction that accepts a vector and returns a data frame with two columns of \"lo\" and \"hi\" values to compare. See examples below.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\n\nExamples:\n\n\nvariables = list(gear = “pairwise”, hp = 10)\n\n\nvariables = list(gear = “sequential”, hp = c(100, 120))\n\n\nvariables = list(hp = (x) data.frame(low = x - 5, high = x + 10))\n\n\nSee the Examples section below for more.\n\n\n\n\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\ncross\n\n\n\n\nFALSE: Contrasts represent the change in adjusted predictions when one predictor changes and all other variables are held constant.\n\n\nTRUE: Contrasts represent the changes in adjusted predictions when all the predictors specified in the variables argument are manipulated simultaneously (a \"cross-contrast\").\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_comparisons(): Average comparisons\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe following transformations can be applied by supplying one of the shortcut strings to the comparison argument. hi is a vector of adjusted predictions for the \"high\" side of the contrast. lo is a vector of adjusted predictions for the \"low\" side of the contrast. y is a vector of adjusted predictions for the original data. x is the predictor in the original data. eps is the step size to use to compute derivatives and elasticities.\n\n\n\nShortcut\n\n\nFunction\n\n\n\n\ndifference\n\n\n(hi, lo) hi - lo\n\n\n\n\ndifferenceavg\n\n\n(hi, lo) mean(hi - lo)\n\n\n\n\ndydx\n\n\n(hi, lo, eps) (hi - lo)/eps\n\n\n\n\neyex\n\n\n(hi, lo, eps, y, x) (hi - lo)/eps * (x/y)\n\n\n\n\neydx\n\n\n(hi, lo, eps, y, x) ((hi - lo)/eps)/y\n\n\n\n\ndyex\n\n\n(hi, lo, eps, x) ((hi - lo)/eps) * x\n\n\n\n\ndydxavg\n\n\n(hi, lo, eps) mean((hi - lo)/eps)\n\n\n\n\neyexavg\n\n\n(hi, lo, eps, y, x) mean((hi - lo)/eps * (x/y))\n\n\n\n\neydxavg\n\n\n(hi, lo, eps, y, x) mean(((hi - lo)/eps)/y)\n\n\n\n\ndyexavg\n\n\n(hi, lo, eps, x) mean(((hi - lo)/eps) * x)\n\n\n\n\nratio\n\n\n(hi, lo) hi/lo\n\n\n\n\nratioavg\n\n\n(hi, lo) mean(hi)/mean(lo)\n\n\n\n\nlnratio\n\n\n(hi, lo) log(hi/lo)\n\n\n\n\nlnratioavg\n\n\n(hi, lo) log(mean(hi)/mean(lo))\n\n\n\n\nlnor\n\n\n(hi, lo) log((hi/(1 - hi))/(lo/(1 - lo)))\n\n\n\n\nlnoravg\n\n\n(hi, lo) log((mean(hi)/(1 - mean(hi)))/(mean(lo)/(1 - mean(lo))))\n\n\n\n\nlift\n\n\n(hi, lo) (hi - lo)/lo\n\n\n\n\nliftavg\n\n\n(hi, lo) (mean(hi - lo))/mean(lo)\n\n\n\n\nexpdydx\n\n\n(hi, lo, eps) ((exp(hi) - exp(lo))/exp(eps))/eps\n\n\n\n\nexpdydxavg\n\n\n(hi, lo, eps) mean(((exp(hi) - exp(lo))/exp(eps))/eps)\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\nlibrary(marginaleffects)\n\n# Linear model\ntmp <- mtcars\ntmp$am <- as.logical(tmp$am)\nmod <- lm(mpg ~ am + factor(cyl), tmp)\navg_comparisons(mod, variables = list(cyl = \"reference\"))\navg_comparisons(mod, variables = list(cyl = \"sequential\"))\navg_comparisons(mod, variables = list(cyl = \"pairwise\"))\n\n# GLM with different scale types\nmod <- glm(am ~ factor(gear), data = mtcars)\navg_comparisons(mod, type = \"response\")\navg_comparisons(mod, type = \"link\")\n\n# Contrasts at the mean\ncomparisons(mod, newdata = \"mean\")\n\n# Contrasts between marginal means\ncomparisons(mod, newdata = \"marginalmeans\")\n\n# Contrasts at user-specified values\ncomparisons(mod, newdata = datagrid(am = 0, gear = tmp$gear))\ncomparisons(mod, newdata = datagrid(am = unique, gear = max))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\ncomparisons(m, variables = \"hp\", newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Numeric contrasts\nmod <- lm(mpg ~ hp, data = mtcars)\navg_comparisons(mod, variables = list(hp = 1))\navg_comparisons(mod, variables = list(hp = 5))\navg_comparisons(mod, variables = list(hp = c(90, 100)))\navg_comparisons(mod, variables = list(hp = \"iqr\"))\navg_comparisons(mod, variables = list(hp = \"sd\"))\navg_comparisons(mod, variables = list(hp = \"minmax\"))\n\n# using a function to specify a custom difference in one regressor\ndat <- mtcars\ndat$new_hp <- 49 * (dat$hp - min(dat$hp)) / (max(dat$hp) - min(dat$hp)) + 1\nmodlog <- lm(mpg ~ log(new_hp) + factor(cyl), data = dat)\nfdiff <- \\(x) data.frame(x, x + 10)\navg_comparisons(modlog, variables = list(new_hp = fdiff))\n\n# Adjusted Risk Ratio: see the contrasts vignette\nmod <- glm(vs ~ mpg, data = mtcars, family = binomial)\navg_comparisons(mod, comparison = \"lnratioavg\", transform = exp)\n\n# Adjusted Risk Ratio: Manual specification of the `comparison`\navg_comparisons(\n mod,\n comparison = function(hi, lo) log(mean(hi) / mean(lo)),\n transform = exp)\n# cross contrasts\nmod <- lm(mpg ~ factor(cyl) * factor(gear) + hp, data = mtcars)\navg_comparisons(mod, variables = c(\"cyl\", \"gear\"), cross = TRUE)\n\n# variable-specific contrasts\navg_comparisons(mod, variables = list(gear = \"sequential\", hp = 10))\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncomparisons(\n mod,\n newdata = \"mean\",\n hypothesis = lc)\n\n# Effect of a 1 group-wise standard deviation change\n# First we calculate the SD in each group of `cyl`\n# Second, we use that SD as the treatment size in the `variables` argument\nlibrary(dplyr)\nmod <- lm(mpg ~ hp + factor(cyl), mtcars)\ntmp <- mtcars %>%\n group_by(cyl) %>%\n mutate(hp_sd = sd(hp))\navg_comparisons(mod, \n variables = list(hp = function(x) data.frame(x, x + tmp$hp_sd)),\n by = \"cyl\")\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\ncomparisons(mod, by = TRUE)\n\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\navg_comparisons(mod, variables = \"hp\", by = c(\"vs\", \"am\"))\n\nlibrary(nnet)\nmod <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\ncomparisons(mod, type = \"probs\", by = by)", + "text": "Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal slopes, that is, slopes made on the original data, but averaged by subgroups. This is analogous to using the by argument in the slopes() function.\nThe condition argument is used to plot conditional slopes, that is, slopes computed on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a slopes() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below. See the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_slopes(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n slope = \"dydx\",\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose marginal effect (slope) we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the slopes() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_slopes(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_slopes(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", "crumbs": [ "Model to Meaning", "Functions", - "`comparisons`" + "`plot_slopes`" ] }, { - "objectID": "man/plot_comparisons.html", - "href": "man/plot_comparisons.html", + "objectID": "man/plot_predictions.html", + "href": "man/plot_predictions.html", "title": "", "section": "", - "text": "Plot comparisons on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal comparisons, that is, comparisons made on the original data, but averaged by subgroups. This is analogous to using the by argument in the comparisons() function.\nThe condition argument is used to plot conditional comparisons, that is, comparisons made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a comparisons() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_comparisons(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n comparison = \"difference\",\n transform = NULL,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose contrast we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the comparisons() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_comparisons(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", + "text": "Plot predictions on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal predictions, that is, predictions made on the original data, but averaged by subgroups. This is analogous to using the by argument in the predictions() function.\nThe condition argument is used to plot conditional predictions, that is, predictions made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a predictions() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_predictions(\n model,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n transform = NULL,\n points = 0,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\ncondition\n\n\nConditional predictions\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum\n\n\n\n\n\n\nby\n\n\nMarginal predictions\n\n\nCharacter vector (max length 3): Names of the categorical predictors to marginalize across.\n\n\n1: x-axis. 2: color. 3: facets.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the predictions() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\npoints\n\n\nNumber between 0 and 1 which controls the transparency of raw data points. 0 (default) does not display any points. Warning: The points displayed are raw data, so the resulting plot is not a \"partial residual plot.\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object or data frame (if draw=FALSE)\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp + wt, data = mtcars)\nplot_predictions(mod, condition = \"wt\")\n\n\n\n\n\n\nmod <- lm(mpg ~ hp * wt * am, data = mtcars)\nplot_predictions(mod, condition = c(\"hp\", \"wt\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = \"threenum\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = range))", "crumbs": [ "Model to Meaning", "Functions", - "`plot_comparisons`" + "`plot_predictions`" ] }, { - "objectID": "man/plot_comparisons.html#plot-conditional-or-marginal-comparisons", - "href": "man/plot_comparisons.html#plot-conditional-or-marginal-comparisons", + "objectID": "man/plot_predictions.html#plot-conditional-or-marginal-predictions", + "href": "man/plot_predictions.html#plot-conditional-or-marginal-predictions", "title": "", "section": "", - "text": "Plot comparisons on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal comparisons, that is, comparisons made on the original data, but averaged by subgroups. This is analogous to using the by argument in the comparisons() function.\nThe condition argument is used to plot conditional comparisons, that is, comparisons made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a comparisons() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_comparisons(\n model,\n variables = NULL,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n comparison = \"difference\",\n transform = NULL,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nvariables\n\n\nName of the variable whose contrast we want to plot on the y-axis.\n\n\n\n\ncondition\n\n\nConditional slopes\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum.\n\n\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the comparisons() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ncomparison\n\n\nHow should pairs of predictions be compared? Difference, ratio, odds ratio, or user-defined functions.\n\n\nstring: shortcuts to common contrast functions.\n\n\nSupported shortcuts strings: difference, differenceavg, differenceavgwts, dydx, eyex, eydx, dyex, dydxavg, eyexavg, eydxavg, dyexavg, dydxavgwts, eyexavgwts, eydxavgwts, dyexavgwts, ratio, ratioavg, ratioavgwts, lnratio, lnratioavg, lnratioavgwts, lnor, lnoravg, lnoravgwts, lift, liftavg, liftavgwts, expdydx, expdydxavg, expdydxavgwts\n\n\nSee the Comparisons section below for definitions of each transformation.\n\n\n\n\nfunction: accept two equal-length numeric vectors of adjusted predictions (hi and lo) and returns a vector of contrasts of the same length, or a unique numeric value.\n\n\nSee the Transformations section below for examples of valid functions.\n\n\n\n\n\n\n\n\ntransform\n\n\nstring or function. Transformation applied to unit-level estimates and confidence intervals just before the function returns results. Functions must accept a vector and return a vector of the same length. Support string shortcuts: \"exp\", \"ln\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp * drat * factor(am), data = mtcars)\n\nplot_comparisons(mod, variables = \"hp\", condition = \"drat\")\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = c(\"drat\", \"am\"))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"hp\", condition = list(\"am\", \"drat\" = 3:5))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = range))\n\n\n\n\n\n\nplot_comparisons(mod, variables = \"am\", condition = list(\"hp\", \"drat\" = \"threenum\"))", + "text": "Plot predictions on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).\nThe by argument is used to plot marginal predictions, that is, predictions made on the original data, but averaged by subgroups. This is analogous to using the by argument in the predictions() function.\nThe condition argument is used to plot conditional predictions, that is, predictions made on a user-specified grid. This is analogous to using the newdata argument and datagrid() function in a predictions() call. All variables whose values are not specified explicitly are treated as usual by datagrid(), that is, they are held at their mean or mode (or rounded mean for integers). This includes grouping variables in mixed-effects models, so analysts who fit such models may want to specify the groups of interest using the condition argument, or supply model-specific arguments to compute population-level estimates. See details below.\nSee the \"Plots\" vignette and website for tutorials and information on how to customize plots:\n\n\nhttps://marginaleffects.com/vignettes/plot.html\n\n\nhttps://marginaleffects.com\n\n\nplot_predictions(\n model,\n condition = NULL,\n by = NULL,\n newdata = NULL,\n type = NULL,\n vcov = NULL,\n conf_level = 0.95,\n wts = FALSE,\n transform = NULL,\n points = 0,\n rug = FALSE,\n gray = FALSE,\n draw = TRUE,\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\ncondition\n\n\nConditional predictions\n\n\nCharacter vector (max length 4): Names of the predictors to display.\n\n\nNamed list (max length 4): List names correspond to predictors. List elements can be:\n\n\nNumeric vector\n\n\nFunction which returns a numeric vector or a set of unique categorical values\n\n\nShortcut strings for common reference values: \"minmax\", \"quartile\", \"threenum\"\n\n\n\n\n1: x-axis. 2: color/shape. 3: facet (wrap if no fourth variable, otherwise cols of grid). 4: facet (rows of grid).\n\n\nNumeric variables in positions 2 and 3 are summarized by Tukey’s five numbers ?stats::fivenum\n\n\n\n\n\n\nby\n\n\nMarginal predictions\n\n\nCharacter vector (max length 3): Names of the categorical predictors to marginalize across.\n\n\n1: x-axis. 2: color. 3: facets.\n\n\n\n\n\n\nnewdata\n\n\nWhen newdata is NULL, the grid is determined by the condition argument. When newdata is not NULL, the argument behaves in the same way as in the predictions() function.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\npoints\n\n\nNumber between 0 and 1 which controls the transparency of raw data points. 0 (default) does not display any points. Warning: The points displayed are raw data, so the resulting plot is not a \"partial residual plot.\"\n\n\n\n\nrug\n\n\nTRUE displays tick marks on the axes to mark the distribution of raw data.\n\n\n\n\ngray\n\n\nFALSE grayscale or color plot\n\n\n\n\ndraw\n\n\nTRUE returns a ggplot2 plot. FALSE returns a data.frame of the underlying data.\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA ggplot2 object or data frame (if draw=FALSE)\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\n\nlibrary(\"marginaleffects\")\n\nmod <- lm(mpg ~ hp + wt, data = mtcars)\nplot_predictions(mod, condition = \"wt\")\n\n\n\n\n\n\nmod <- lm(mpg ~ hp * wt * am, data = mtcars)\nplot_predictions(mod, condition = c(\"hp\", \"wt\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = \"threenum\"))\n\n\n\n\n\n\nplot_predictions(mod, condition = list(\"hp\", wt = range))", "crumbs": [ "Model to Meaning", "Functions", - "`plot_comparisons`" + "`plot_predictions`" ] }, { - "objectID": "man/posterior_draws.html", - "href": "man/posterior_draws.html", + "objectID": "man/slopes.html", + "href": "man/slopes.html", "title": "", "section": "", - "text": "Extract Posterior Draws or Bootstrap Resamples from marginaleffects Objects\n\n\n\nposterior_draws(x, shape = \"long\")\n\n\n\n\n\n\n\nx\n\n\nAn object produced by a marginaleffects package function, such as predictions(), avg_slopes(), hypotheses(), etc.\n\n\n\n\nshape\n\n\nstring indicating the shape of the output format:\n\n\n\"long\": long format data frame\n\n\n\"DxP\": Matrix with draws as rows and parameters as columns\n\n\n\"PxD\": Matrix with draws as rows and parameters as columns\n\n\n\"rvar\": Random variable datatype (see posterior package documentation).\n\n\n\n\n\n\n\n\nA data.frame with drawid and draw columns.", + "text": "Partial derivative of the regression equation with respect to a regressor of interest.\n\n\nslopes(): unit-level (conditional) estimates.\n\n\navg_slopes(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the slopes vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/slopes.html\n\n\nhttps://marginaleffects.com/\n\n\nslopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = FALSE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_slopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = TRUE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the slopes.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\nstring:\n\n\n\"mean\": Slopes evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Slopes evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Slopes evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Slopes evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Slopes evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute slopes or comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA \"slope\" or \"marginal effect\" is the partial derivative of the regression equation with respect to a variable in the model. This function uses automatic differentiation to compute slopes for a vast array of models, including non-linear models with transformations (e.g., polynomials). Uncertainty estimates are computed using the delta method.\nNumerical derivatives for the slopes function are calculated using a simple epsilon difference approach: \\(\\partial Y / \\partial X = (f(X + \\varepsilon/2) - f(X-\\varepsilon/2)) / \\varepsilon\\), where f is the predict() method associated with the model class, and \\(\\varepsilon\\) is determined by the eps argument.\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_slopes(): Average slopes\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Unit-level (conditional) Marginal Effects\nmod <- glm(am ~ hp * wt, data = mtcars, family = binomial)\nmfx <- slopes(mod)\nhead(mfx)\n\n# Average Marginal Effect (AME)\navg_slopes(mod, by = TRUE)\n\n\n# Marginal Effect at the Mean (MEM)\nslopes(mod, newdata = datagrid())\n\n# Marginal Effect at User-Specified Values\n# Variables not explicitly included in `datagrid()` are held at their means\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n# Group-Average Marginal Effects (G-AME)\n# Calculate marginal effects for each observation, and then take the average\n# marginal effect within each subset of observations with different observed\n# values for the `cyl` variable:\nmod2 <- lm(mpg ~ hp * cyl, data = mtcars)\navg_slopes(mod2, variables = \"hp\", by = \"cyl\")\n\n# Marginal Effects at User-Specified Values (counterfactual)\n# Variables not explicitly included in `datagrid()` are held at their\n# original values, and the whole dataset is duplicated once for each\n# combination of the values in `datagrid()`\nmfx <- slopes(mod,\n newdata = datagrid(\n hp = c(100, 110),\n grid_type = \"counterfactual\"))\nhead(mfx)\n\n# Heteroskedasticity robust standard errors\nmfx <- slopes(mod, vcov = sandwich::vcovHC(mod))\nhead(mfx)\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(\n c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncolnames(lc) <- c(\"Contrast A\", \"Contrast B\")\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = lc)", "crumbs": [ "Model to Meaning", "Functions", - "`posterior_draws`" + "`slopes`" ] }, { - "objectID": "man/posterior_draws.html#extract-posterior-draws-or-bootstrap-resamples-from-marginaleffects-objects", - "href": "man/posterior_draws.html#extract-posterior-draws-or-bootstrap-resamples-from-marginaleffects-objects", + "objectID": "man/slopes.html#slopes-aka-partial-derivatives-marginal-effects-or-trends", + "href": "man/slopes.html#slopes-aka-partial-derivatives-marginal-effects-or-trends", "title": "", "section": "", - "text": "Extract Posterior Draws or Bootstrap Resamples from marginaleffects Objects\n\n\n\nposterior_draws(x, shape = \"long\")\n\n\n\n\n\n\n\nx\n\n\nAn object produced by a marginaleffects package function, such as predictions(), avg_slopes(), hypotheses(), etc.\n\n\n\n\nshape\n\n\nstring indicating the shape of the output format:\n\n\n\"long\": long format data frame\n\n\n\"DxP\": Matrix with draws as rows and parameters as columns\n\n\n\"PxD\": Matrix with draws as rows and parameters as columns\n\n\n\"rvar\": Random variable datatype (see posterior package documentation).\n\n\n\n\n\n\n\n\nA data.frame with drawid and draw columns.", + "text": "Partial derivative of the regression equation with respect to a regressor of interest.\n\n\nslopes(): unit-level (conditional) estimates.\n\n\navg_slopes(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the slopes vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/slopes.html\n\n\nhttps://marginaleffects.com/\n\n\nslopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = FALSE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\navg_slopes(\n model,\n newdata = NULL,\n variables = NULL,\n type = NULL,\n by = TRUE,\n vcov = TRUE,\n conf_level = 0.95,\n slope = \"dydx\",\n wts = FALSE,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n eps = NULL,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate the slopes.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level slopes for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\nstring:\n\n\n\"mean\": Slopes evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Slopes evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Slopes evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Slopes evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Slopes evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\n\n\n\n\nvariables\n\n\nFocal variables\n\n\nNULL: compute slopes or comparisons for all the variables in the model object (can be slow).\n\n\nCharacter vector: subset of variables (usually faster).\n\n\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\nslope\n\n\nstring indicates the type of slope or (semi-)elasticity to compute:\n\n\n\"dydx\": dY/dX\n\n\n\"eyex\": dY/dX * Y / X\n\n\n\"eydx\": dY/dX * Y\n\n\n\"dyex\": dY/dX / X\n\n\nY is the predicted value of the outcome; X is the observed value of the predictor.\n\n\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\neps\n\n\nNULL or numeric value which determines the step size to use when calculating numerical derivatives: (f(x+eps)-f(x))/eps. When eps is NULL, the step size is 0.0001 multiplied by the difference between the maximum and minimum values of the variable with respect to which we are taking the derivative. Changing eps may be necessary to avoid numerical problems in certain models.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA \"slope\" or \"marginal effect\" is the partial derivative of the regression equation with respect to a variable in the model. This function uses automatic differentiation to compute slopes for a vast array of models, including non-linear models with transformations (e.g., polynomials). Uncertainty estimates are computed using the delta method.\nNumerical derivatives for the slopes function are calculated using a simple epsilon difference approach: \\(\\partial Y / \\partial X = (f(X + \\varepsilon/2) - f(X-\\varepsilon/2)) / \\varepsilon\\), where f is the predict() method associated with the model class, and \\(\\varepsilon\\) is determined by the eps argument.\n\nA data.frame with one row per observation (per term/group) and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nterm: the variable whose marginal effect is computed\n\n\ndydx: slope of the outcome with respect to the term, for a given combination of predictor values\n\n\nstd.error: standard errors computed by via the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_slopes(): Average slopes\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Unit-level (conditional) Marginal Effects\nmod <- glm(am ~ hp * wt, data = mtcars, family = binomial)\nmfx <- slopes(mod)\nhead(mfx)\n\n# Average Marginal Effect (AME)\navg_slopes(mod, by = TRUE)\n\n\n# Marginal Effect at the Mean (MEM)\nslopes(mod, newdata = datagrid())\n\n# Marginal Effect at User-Specified Values\n# Variables not explicitly included in `datagrid()` are held at their means\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n# Group-Average Marginal Effects (G-AME)\n# Calculate marginal effects for each observation, and then take the average\n# marginal effect within each subset of observations with different observed\n# values for the `cyl` variable:\nmod2 <- lm(mpg ~ hp * cyl, data = mtcars)\navg_slopes(mod2, variables = \"hp\", by = \"cyl\")\n\n# Marginal Effects at User-Specified Values (counterfactual)\n# Variables not explicitly included in `datagrid()` are held at their\n# original values, and the whole dataset is duplicated once for each\n# combination of the values in `datagrid()`\nmfx <- slopes(mod,\n newdata = datagrid(\n hp = c(100, 110),\n grid_type = \"counterfactual\"))\nhead(mfx)\n\n# Heteroskedasticity robust standard errors\nmfx <- slopes(mod, vcov = sandwich::vcovHC(mod))\nhead(mfx)\n\n# hypothesis test: is the `hp` marginal effect at the mean equal to the `drat` marginal effect\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"wt = drat\")\n\n# same hypothesis test using row indices\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(\n c(\n 1, -1,\n 2, 3),\n ncol = 2)\ncolnames(lc) <- c(\"Contrast A\", \"Contrast B\")\nslopes(\n mod,\n newdata = \"mean\",\n hypothesis = lc)", "crumbs": [ "Model to Meaning", "Functions", - "`posterior_draws`" + "`slopes`" ] }, { - "objectID": "man/datagrid.html", - "href": "man/datagrid.html", + "objectID": "man/inferences.html", + "href": "man/inferences.html", "title": "", "section": "", - "text": "Generate a data grid of user-specified values for use in the newdata argument of the predictions(), comparisons(), and slopes() functions. This is useful to define where in the predictor space we want to evaluate the quantities of interest. Ex: the predicted outcome or slope for a 37 year old college graduate.\n\ndatagrid(\n ...,\n model = NULL,\n newdata = NULL,\n by = NULL,\n grid_type = \"mean_or_mode\",\n response = FALSE,\n FUN_character = NULL,\n FUN_factor = NULL,\n FUN_logical = NULL,\n FUN_numeric = NULL,\n FUN_integer = NULL,\n FUN_binary = NULL,\n FUN_other = NULL\n)\n\n\n\n\n\n…\n\n\nnamed arguments with vectors of values or functions for user-specified variables.\n\n\nFunctions are applied to the variable in the model dataset or newdata, and must return a vector of the appropriate type.\n\n\nCharacter vectors are automatically transformed to factors if necessary. +The output will include all combinations of these variables (see Examples below.)\n\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\ndata.frame (one and only one of the model and newdata arguments can be used.)\n\n\n\n\nby\n\n\ncharacter vector with grouping variables within which FUN_* functions are applied to create \"sub-grids\" with unspecified variables.\n\n\n\n\ngrid_type\n\n\ncharacter. Determines the functions to apply to each variable. The defaults can be overridden by defining individual variables explicitly in …, or by supplying a function to one of the FUN_* arguments.\n\n\n\"mean_or_mode\": Character, factor, logical, and binary variables are set to their modes. Numeric, integer, and other variables are set to their means.\n\n\n\"balanced\": Each unique level of character, factor, logical, and binary variables are preserved. Numeric, integer, and other variables are set to their means. Warning: When there are many variables and many levels per variable, a balanced grid can be very large. In those cases, it is better to use grid_type=“mean_or_mode” and to specify the unique levels of a subset of named variables explicitly.\n\n\n\"counterfactual\": the entire dataset is duplicated for each combination of the variable values specified in …. Variables not explicitly supplied to datagrid() are set to their observed values in the original dataset.\n\n\n\n\n\n\nresponse\n\n\nLogical should the response variable be included in the grid, even if it is not specified explicitly.\n\n\n\n\nFUN_character\n\n\nthe function to be applied to character variables.\n\n\n\n\nFUN_factor\n\n\nthe function to be applied to factor variables. This only applies if the variable in the original data is a factor. For variables converted to factor in a model-fitting formula, for example, FUN_character is used.\n\n\n\n\nFUN_logical\n\n\nthe function to be applied to logical variables.\n\n\n\n\nFUN_numeric\n\n\nthe function to be applied to numeric variables.\n\n\n\n\nFUN_integer\n\n\nthe function to be applied to integer variables.\n\n\n\n\nFUN_binary\n\n\nthe function to be applied to binary variables.\n\n\n\n\nFUN_other\n\n\nthe function to be applied to other variable types.\n\n\n\nIf datagrid is used in a predictions(), comparisons(), or slopes() call as the newdata argument, the model is automatically inserted in the model argument of datagrid() call, and users do not need to specify either the model or newdata arguments. The same behavior will occur when the value supplied to newdata= is a function call which starts with \"datagrid\". This is intended to allow users to create convenience shortcuts like:\nlibrary(marginaleffects)\nmod <- lm(mpg ~ am + vs + factor(cyl) + hp, mtcars)\ndatagrid_bal <- function(...) datagrid(..., grid_type = \"balanced\")\npredictions(model, newdata = datagrid_bal(cyl = 4))\n\nIf users supply a model, the data used to fit that model is retrieved using the insight::get_data function.\n\nA data.frame in which each row corresponds to one combination of the named predictors supplied by the user via the … dots. Variables which are not explicitly defined are held at their mean or mode.\n\n\nlibrary(\"marginaleffects\")\n\n# The output only has 2 rows, and all the variables except `hp` are at their\n# mean or mode.\ndatagrid(newdata = mtcars, hp = c(100, 110))\n\n mpg cyl disp drat wt qsec vs am gear carb hp\n1 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 100\n2 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 110\n rowid\n1 1\n2 2\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndatagrid(model = mod, hp = c(100, 110))\n\n hp rowid\n1 100 1\n2 110 2\n\n# Use in `marginaleffects` to compute \"Typical Marginal Effects\". When used\n# in `slopes()` or `predictions()` we do not need to specify the\n#`model` or `newdata` arguments.\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 100 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 110 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nColumns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# datagrid accepts functions\ndatagrid(hp = range, cyl = unique, newdata = mtcars)\n\n mpg disp drat wt qsec vs am gear carb hp cyl rowid\n1 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 6 1\n2 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 4 2\n3 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 8 3\n4 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 6 4\n5 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 4 5\n6 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 8 6\n\ncomparisons(mod, newdata = datagrid(hp = fivenum))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 52 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 96 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 123 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 180 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 335 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nComparison: +1\nColumns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# The full dataset is duplicated with each observation given counterfactual\n# values of 100 and 110 for the `hp` variable. The original `mtcars` includes\n# 32 rows, so the resulting dataset includes 64 rows.\ndg <- datagrid(newdata = mtcars, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndg <- datagrid(model = mod, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64", + "text": "Warning: This function is experimental. It may be renamed, the user interface may change, or the functionality may migrate to arguments in other marginaleffects functions.\nApply this function to a marginaleffects object to change the inferential method used to compute uncertainty estimates.\n\ninferences(\n x,\n method,\n R = 1000,\n conf_type = \"perc\",\n conformal_test = NULL,\n conformal_calibration = NULL,\n conformal_score = \"residual_abs\",\n ...\n)\n\n\n\n\n\nx\n\n\nObject produced by one of the core marginaleffects functions.\n\n\n\n\nmethod\n\n\nString\n\n\n\"delta\": delta method standard errors\n\n\n\"boot\" package\n\n\n\"fwb\": fractional weighted bootstrap\n\n\n\"rsample\" package\n\n\n\"simulation\" from a multivariate normal distribution (Krinsky & Robb, 1986)\n\n\n\"mi\" multiple imputation for missing data\n\n\n\"conformal_split\": prediction intervals using split conformal prediction (see Angelopoulos & Bates, 2022)\n\n\n\"conformal_cv+\": prediction intervals using cross-validation+ conformal prediction (see Barber et al., 2020)\n\n\n\n\n\n\nR\n\n\nNumber of resamples, simulations, or cross-validation folds.\n\n\n\n\nconf_type\n\n\nString: type of bootstrap interval to construct.\n\n\nboot: \"perc\", \"norm\", \"basic\", or \"bca\"\n\n\nfwb: \"perc\", \"norm\", \"basic\", \"bc\", or \"bca\"\n\n\nrsample: \"perc\" or \"bca\"\n\n\nsimulation: argument ignored.\n\n\n\n\n\n\nconformal_test\n\n\nData frame of test data for conformal prediction.\n\n\n\n\nconformal_calibration\n\n\nData frame of calibration data for split conformal prediction (method=“conformal_split).\n\n\n\n\nconformal_score\n\n\nString. Warning: The type argument in predictions() must generate predictions which are on the same scale as the outcome variable. Typically, this means that type must be \"response\" or \"probs\".\n\n\n\"residual_abs\" or \"residual_sq\" for regression tasks (numeric outcome)\n\n\n\"softmax\" for classification tasks (when predictions() returns a group columns, such as multinomial or ordinal logit models.\n\n\n\n\n\n\n…\n\n\n\n\nIf method=“boot”, additional arguments are passed to boot::boot().\n\n\nIf method=“fwb”, additional arguments are passed to fwb::fwb().\n\n\nIf method=“rsample”, additional arguments are passed to rsample::bootstraps().\n\n\nAdditional arguments are ignored for all other methods.\n\n\n\n\n\nWhen method=“simulation”, we conduct simulation-based inference following the method discussed in Krinsky & Robb (1986):\n\n\nDraw R sets of simulated coefficients from a multivariate normal distribution with mean equal to the original model’s estimated coefficients and variance equal to the model’s variance-covariance matrix (classical, \"HC3\", or other).\n\n\nUse the R sets of coefficients to compute R sets of estimands: predictions, comparisons, slopes, or hypotheses.\n\n\nTake quantiles of the resulting distribution of estimands to obtain a confidence interval and the standard deviation of simulated estimates to estimate the standard error.\n\n\nWhen method=“fwb”, drawn weights are supplied to the model fitting function’s weights argument; if the model doesn’t accept non-integer weights, this method should not be used. If weights were included in the original model fit, they are extracted by weights() and multiplied by the drawn weights. These weights are supplied to the wts argument of the estimation function (e.g., comparisons()).\n\nA marginaleffects object with simulation or bootstrap resamples and objects attached.\n\nKrinsky, I., and A. L. Robb. 1986. “On Approximating the Statistical Properties of Elasticities.” Review of Economics and Statistics 68 (4): 715–9.\nKing, Gary, Michael Tomz, and Jason Wittenberg. \"Making the most of statistical analyses: Improving interpretation and presentation.\" American journal of political science (2000): 347-361\nDowd, Bryan E., William H. Greene, and Edward C. Norton. \"Computation of standard errors.\" Health services research 49.2 (2014): 731-750.\nAngelopoulos, Anastasios N., and Stephen Bates. 2022. \"A Gentle Introduction to Conformal Prediction and Distribution-Free Uncertainty Quantification.\" arXiv. https://doi.org/10.48550/arXiv.2107.07511.\nBarber, Rina Foygel, Emmanuel J. Candes, Aaditya Ramdas, and Ryan J. Tibshirani. 2020. “Predictive Inference with the Jackknife+.” arXiv. http://arxiv.org/abs/1905.02928.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nlibrary(magrittr)\nset.seed(1024)\nmod <- lm(Sepal.Length ~ Sepal.Width * Species, data = iris)\n\n# bootstrap\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"boot\")\n\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"rsample\")\n\n# Fractional (bayesian) bootstrap\navg_slopes(mod, by = \"Species\") %>%\n inferences(method = \"fwb\") %>%\n posterior_draws(\"rvar\") %>%\n data.frame()\n\n# Simulation-based inference\nslopes(mod) %>%\n inferences(method = \"simulation\") %>%\n head()", "crumbs": [ "Model to Meaning", "Functions", - "`datagrid`" + "`inferences`" ] }, { - "objectID": "man/datagrid.html#data-grids", - "href": "man/datagrid.html#data-grids", + "objectID": "man/inferences.html#experimental-bootstrap-conformal-and-simulation-based-inference", + "href": "man/inferences.html#experimental-bootstrap-conformal-and-simulation-based-inference", "title": "", "section": "", - "text": "Generate a data grid of user-specified values for use in the newdata argument of the predictions(), comparisons(), and slopes() functions. This is useful to define where in the predictor space we want to evaluate the quantities of interest. Ex: the predicted outcome or slope for a 37 year old college graduate.\n\ndatagrid(\n ...,\n model = NULL,\n newdata = NULL,\n by = NULL,\n grid_type = \"mean_or_mode\",\n response = FALSE,\n FUN_character = NULL,\n FUN_factor = NULL,\n FUN_logical = NULL,\n FUN_numeric = NULL,\n FUN_integer = NULL,\n FUN_binary = NULL,\n FUN_other = NULL\n)\n\n\n\n\n\n…\n\n\nnamed arguments with vectors of values or functions for user-specified variables.\n\n\nFunctions are applied to the variable in the model dataset or newdata, and must return a vector of the appropriate type.\n\n\nCharacter vectors are automatically transformed to factors if necessary. +The output will include all combinations of these variables (see Examples below.)\n\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\ndata.frame (one and only one of the model and newdata arguments can be used.)\n\n\n\n\nby\n\n\ncharacter vector with grouping variables within which FUN_* functions are applied to create \"sub-grids\" with unspecified variables.\n\n\n\n\ngrid_type\n\n\ncharacter. Determines the functions to apply to each variable. The defaults can be overridden by defining individual variables explicitly in …, or by supplying a function to one of the FUN_* arguments.\n\n\n\"mean_or_mode\": Character, factor, logical, and binary variables are set to their modes. Numeric, integer, and other variables are set to their means.\n\n\n\"balanced\": Each unique level of character, factor, logical, and binary variables are preserved. Numeric, integer, and other variables are set to their means. Warning: When there are many variables and many levels per variable, a balanced grid can be very large. In those cases, it is better to use grid_type=“mean_or_mode” and to specify the unique levels of a subset of named variables explicitly.\n\n\n\"counterfactual\": the entire dataset is duplicated for each combination of the variable values specified in …. Variables not explicitly supplied to datagrid() are set to their observed values in the original dataset.\n\n\n\n\n\n\nresponse\n\n\nLogical should the response variable be included in the grid, even if it is not specified explicitly.\n\n\n\n\nFUN_character\n\n\nthe function to be applied to character variables.\n\n\n\n\nFUN_factor\n\n\nthe function to be applied to factor variables. This only applies if the variable in the original data is a factor. For variables converted to factor in a model-fitting formula, for example, FUN_character is used.\n\n\n\n\nFUN_logical\n\n\nthe function to be applied to logical variables.\n\n\n\n\nFUN_numeric\n\n\nthe function to be applied to numeric variables.\n\n\n\n\nFUN_integer\n\n\nthe function to be applied to integer variables.\n\n\n\n\nFUN_binary\n\n\nthe function to be applied to binary variables.\n\n\n\n\nFUN_other\n\n\nthe function to be applied to other variable types.\n\n\n\nIf datagrid is used in a predictions(), comparisons(), or slopes() call as the newdata argument, the model is automatically inserted in the model argument of datagrid() call, and users do not need to specify either the model or newdata arguments. The same behavior will occur when the value supplied to newdata= is a function call which starts with \"datagrid\". This is intended to allow users to create convenience shortcuts like:\nlibrary(marginaleffects)\nmod <- lm(mpg ~ am + vs + factor(cyl) + hp, mtcars)\ndatagrid_bal <- function(...) datagrid(..., grid_type = \"balanced\")\npredictions(model, newdata = datagrid_bal(cyl = 4))\n\nIf users supply a model, the data used to fit that model is retrieved using the insight::get_data function.\n\nA data.frame in which each row corresponds to one combination of the named predictors supplied by the user via the … dots. Variables which are not explicitly defined are held at their mean or mode.\n\n\nlibrary(\"marginaleffects\")\n\n# The output only has 2 rows, and all the variables except `hp` are at their\n# mean or mode.\ndatagrid(newdata = mtcars, hp = c(100, 110))\n\n mpg cyl disp drat wt qsec vs am gear carb hp\n1 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 100\n2 20.09062 6.1875 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 110\n rowid\n1 1\n2 2\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndatagrid(model = mod, hp = c(100, 110))\n\n hp rowid\n1 100 1\n2 110 2\n\n# Use in `marginaleffects` to compute \"Typical Marginal Effects\". When used\n# in `slopes()` or `predictions()` we do not need to specify the\n#`model` or `newdata` arguments.\nslopes(mod, newdata = datagrid(hp = c(100, 110)))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 100 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 110 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nColumns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# datagrid accepts functions\ndatagrid(hp = range, cyl = unique, newdata = mtcars)\n\n mpg disp drat wt qsec vs am gear carb hp cyl rowid\n1 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 6 1\n2 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 4 2\n3 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 52 8 3\n4 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 6 4\n5 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 4 5\n6 20.09062 230.7219 3.596563 3.21725 17.84875 0 0 3.6875 2.8125 335 8 6\n\ncomparisons(mod, newdata = datagrid(hp = fivenum))\n\n\n Term hp Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n hp 52 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 96 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 123 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 180 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n hp 335 -0.0682 0.0101 -6.74 <0.001 35.9 -0.0881 -0.0484\n\nType: response \nComparison: +1\nColumns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, predicted_lo, predicted_hi, predicted, mpg \n\n# The full dataset is duplicated with each observation given counterfactual\n# values of 100 and 110 for the `hp` variable. The original `mtcars` includes\n# 32 rows, so the resulting dataset includes 64 rows.\ndg <- datagrid(newdata = mtcars, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64\n\n# We get the same result by feeding a model instead of a data.frame\nmod <- lm(mpg ~ hp, mtcars)\ndg <- datagrid(model = mod, hp = c(100, 110), grid_type = \"counterfactual\")\nnrow(dg)\n\n[1] 64", + "text": "Warning: This function is experimental. It may be renamed, the user interface may change, or the functionality may migrate to arguments in other marginaleffects functions.\nApply this function to a marginaleffects object to change the inferential method used to compute uncertainty estimates.\n\ninferences(\n x,\n method,\n R = 1000,\n conf_type = \"perc\",\n conformal_test = NULL,\n conformal_calibration = NULL,\n conformal_score = \"residual_abs\",\n ...\n)\n\n\n\n\n\nx\n\n\nObject produced by one of the core marginaleffects functions.\n\n\n\n\nmethod\n\n\nString\n\n\n\"delta\": delta method standard errors\n\n\n\"boot\" package\n\n\n\"fwb\": fractional weighted bootstrap\n\n\n\"rsample\" package\n\n\n\"simulation\" from a multivariate normal distribution (Krinsky & Robb, 1986)\n\n\n\"mi\" multiple imputation for missing data\n\n\n\"conformal_split\": prediction intervals using split conformal prediction (see Angelopoulos & Bates, 2022)\n\n\n\"conformal_cv+\": prediction intervals using cross-validation+ conformal prediction (see Barber et al., 2020)\n\n\n\n\n\n\nR\n\n\nNumber of resamples, simulations, or cross-validation folds.\n\n\n\n\nconf_type\n\n\nString: type of bootstrap interval to construct.\n\n\nboot: \"perc\", \"norm\", \"basic\", or \"bca\"\n\n\nfwb: \"perc\", \"norm\", \"basic\", \"bc\", or \"bca\"\n\n\nrsample: \"perc\" or \"bca\"\n\n\nsimulation: argument ignored.\n\n\n\n\n\n\nconformal_test\n\n\nData frame of test data for conformal prediction.\n\n\n\n\nconformal_calibration\n\n\nData frame of calibration data for split conformal prediction (method=“conformal_split).\n\n\n\n\nconformal_score\n\n\nString. Warning: The type argument in predictions() must generate predictions which are on the same scale as the outcome variable. Typically, this means that type must be \"response\" or \"probs\".\n\n\n\"residual_abs\" or \"residual_sq\" for regression tasks (numeric outcome)\n\n\n\"softmax\" for classification tasks (when predictions() returns a group columns, such as multinomial or ordinal logit models.\n\n\n\n\n\n\n…\n\n\n\n\nIf method=“boot”, additional arguments are passed to boot::boot().\n\n\nIf method=“fwb”, additional arguments are passed to fwb::fwb().\n\n\nIf method=“rsample”, additional arguments are passed to rsample::bootstraps().\n\n\nAdditional arguments are ignored for all other methods.\n\n\n\n\n\nWhen method=“simulation”, we conduct simulation-based inference following the method discussed in Krinsky & Robb (1986):\n\n\nDraw R sets of simulated coefficients from a multivariate normal distribution with mean equal to the original model’s estimated coefficients and variance equal to the model’s variance-covariance matrix (classical, \"HC3\", or other).\n\n\nUse the R sets of coefficients to compute R sets of estimands: predictions, comparisons, slopes, or hypotheses.\n\n\nTake quantiles of the resulting distribution of estimands to obtain a confidence interval and the standard deviation of simulated estimates to estimate the standard error.\n\n\nWhen method=“fwb”, drawn weights are supplied to the model fitting function’s weights argument; if the model doesn’t accept non-integer weights, this method should not be used. If weights were included in the original model fit, they are extracted by weights() and multiplied by the drawn weights. These weights are supplied to the wts argument of the estimation function (e.g., comparisons()).\n\nA marginaleffects object with simulation or bootstrap resamples and objects attached.\n\nKrinsky, I., and A. L. Robb. 1986. “On Approximating the Statistical Properties of Elasticities.” Review of Economics and Statistics 68 (4): 715–9.\nKing, Gary, Michael Tomz, and Jason Wittenberg. \"Making the most of statistical analyses: Improving interpretation and presentation.\" American journal of political science (2000): 347-361\nDowd, Bryan E., William H. Greene, and Edward C. Norton. \"Computation of standard errors.\" Health services research 49.2 (2014): 731-750.\nAngelopoulos, Anastasios N., and Stephen Bates. 2022. \"A Gentle Introduction to Conformal Prediction and Distribution-Free Uncertainty Quantification.\" arXiv. https://doi.org/10.48550/arXiv.2107.07511.\nBarber, Rina Foygel, Emmanuel J. Candes, Aaditya Ramdas, and Ryan J. Tibshirani. 2020. “Predictive Inference with the Jackknife+.” arXiv. http://arxiv.org/abs/1905.02928.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nlibrary(magrittr)\nset.seed(1024)\nmod <- lm(Sepal.Length ~ Sepal.Width * Species, data = iris)\n\n# bootstrap\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"boot\")\n\navg_predictions(mod, by = \"Species\") %>%\n inferences(method = \"rsample\")\n\n# Fractional (bayesian) bootstrap\navg_slopes(mod, by = \"Species\") %>%\n inferences(method = \"fwb\") %>%\n posterior_draws(\"rvar\") %>%\n data.frame()\n\n# Simulation-based inference\nslopes(mod) %>%\n inferences(method = \"simulation\") %>%\n head()", "crumbs": [ "Model to Meaning", "Functions", - "`datagrid`" + "`inferences`" ] }, { - "objectID": "man/predictions.html", - "href": "man/predictions.html", + "objectID": "man/hypotheses.html", + "href": "man/hypotheses.html", "title": "", "section": "", - "text": "Outcome predicted by a fitted model on a specified scale for a given combination of values of the predictor variables, such as their observed values, their means, or factor levels (a.k.a. \"reference grid\").\n\n\npredictions(): unit-level (conditional) estimates.\n\n\navg_predictions(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the predictions vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/predictions.html\n\n\nhttps://marginaleffects.com/\n\n\npredictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = FALSE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\navg_predictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = TRUE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate predictions.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level predictions for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\nstring:\n\n\n\"mean\": Predictions evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Predictions evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Predictions evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Predictions evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Predictions evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nCounterfactual variables.\n\n\nOutput:\n\n\npredictions(): The entire dataset is replicated once for each unique combination of variables, and predictions are made.\n\n\navg_predictions(): The entire dataset is replicated, predictions are made, and they are marginalized by variables categories.\n\n\nWarning: This can be expensive in large datasets.\n\n\nWarning: Users who need \"conditional\" predictions should use the newdata argument instead of variables.\n\n\n\n\nInput:\n\n\nNULL: computes one prediction per row of newdata\n\n\nCharacter vector: the dataset is replicated once of every combination of unique values of the variables identified in variables.\n\n\nNamed list: names identify the subset of variables of interest and their values. For numeric variables, the variables argument supports functions and string shortcuts:\n\n\nA function which returns a numeric value\n\n\nNumeric vector: Contrast between the 2nd element and the 1st element of the x vector.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\"threenum\": mean and 1 standard deviation on both sides\n\n\n\"fivenum\": Tukey’s five numbers\n\n\n\n\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nbyfun\n\n\nA function such as mean() or sum() used to aggregate estimates within the subgroups defined by the by argument. NULL uses the mean() function. Must accept a numeric vector and return a single numeric value. This is sometimes used to take the sum or mean of predicted probabilities across outcome or predictor levels. See examples section.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nestimate: predicted outcome\n\n\nstd.error: standard errors computed using the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_predictions(): Average predictions\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Adjusted Prediction for every row of the original dataset\nmod <- lm(mpg ~ hp + factor(cyl), data = mtcars)\npred <- predictions(mod)\nhead(pred)\n\n# Adjusted Predictions at User-Specified Values of the Regressors\npredictions(mod, newdata = datagrid(hp = c(100, 120), cyl = 4))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\npredictions(m, newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Average Adjusted Predictions (AAP)\nlibrary(dplyr)\nmod <- lm(mpg ~ hp * am * vs, mtcars)\n\navg_predictions(mod)\n\npredictions(mod, by = \"am\")\n\n# Conditional Adjusted Predictions\nplot_predictions(mod, condition = \"hp\")\n\n# Counterfactual predictions with the `variables` argument\n# the `mtcars` dataset has 32 rows\n\nmod <- lm(mpg ~ hp + am, data = mtcars)\np <- predictions(mod)\nhead(p)\nnrow(p)\n\n# average counterfactual predictions\navg_predictions(mod, variables = \"am\")\n\n# counterfactual predictions obtained by replicating the entire for different\n# values of the predictors\np <- predictions(mod, variables = list(hp = c(90, 110)))\nnrow(p)\n\n\n# hypothesis test: is the prediction in the 1st row equal to the prediction in the 2nd row\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 = b2\")\n\n# same hypothesis test using row indices\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = lc)\n\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\npredictions(mod, by = c(\"am\", \"vs\"))\n\nlibrary(nnet)\nnom <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\n\n# first 5 raw predictions\npredictions(nom, type = \"probs\") |> head()\n\n# average predictions\navg_predictions(nom, type = \"probs\", by = \"group\")\n\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\n\npredictions(nom, type = \"probs\", by = by)\n\n# sum of predicted probabilities for combined response levels\nmod <- multinom(factor(cyl) ~ mpg + am, data = mtcars, trace = FALSE)\nby <- data.frame(\n by = c(\"4,6\", \"4,6\", \"8\"),\n group = as.character(c(4, 6, 8)))\npredictions(mod, newdata = \"mean\", byfun = sum, by = by)", + "text": "Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, hypotheses emulates the behavior of the excellent and well-established car::deltaMethod and car::linearHypothesis functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors.\nTo learn more, read the hypothesis tests vignette, visit the package website, or scroll down this page for a full list of vignettes:\n\n\nhttps://marginaleffects.com/vignettes/hypothesis.html\n\n\nhttps://marginaleffects.com/\n\n\nWarning #1: Tests are conducted directly on the scale defined by the type argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the “link” scale instead of the “response” scale which is often the default.\nWarning #2: For hypothesis tests on objects produced by the marginaleffects package, it is safer to use the hypothesis argument of the original function. Using hypotheses() may not work in certain environments, in lists, or when working programmatically with *apply style functions.\nWarning #3: The tests assume that the hypothesis expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the inferences() function with method = “boot”.\n\nhypotheses(\n model,\n hypothesis = NULL,\n vcov = NULL,\n conf_level = 0.95,\n df = NULL,\n equivalence = NULL,\n joint = FALSE,\n joint_test = \"f\",\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object or object generated by the comparisons(), slopes(), or predictions() functions.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When using joint_test=“f”, the df argument should be a numeric vector of length 2.\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\njoint\n\n\nJoint test of statistical significance. The null hypothesis value can be set using the hypothesis argument.\n\n\nFALSE: Hypotheses are not tested jointly.\n\n\nTRUE: All parameters are tested jointly.\n\n\nString: A regular expression to match parameters to be tested jointly. grep(joint, perl = TRUE)\n\n\nCharacter vector of parameter names to be tested. Characters refer to the names of the vector returned by coef(object).\n\n\nInteger vector of indices. Which parameters positions to test jointly.\n\n\n\n\n\n\njoint_test\n\n\nA character string specifying the type of test, either \"f\" or \"chisq\". The null hypothesis is set by the hypothesis argument, with default null equal to 0 for all parameters.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nThe test statistic for the joint Wald test is calculated as (R * theta_hat - r)’ * inv(R * V_hat * R’) * (R * theta_hat - r) / Q, where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters, r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized.\nThe p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test). For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters. For the Chi-squared test, the degrees of freedom are Q.\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars)\n\nhypotheses(mod)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n (Intercept) 35.8460 2.041 17.56 <0.001 227.0 31.8457 39.846319\n hp -0.0231 0.012 -1.93 0.0531 4.2 -0.0465 0.000306\n wt -3.1814 0.720 -4.42 <0.001 16.6 -4.5918 -1.771012\n factor(cyl)6 -3.3590 1.402 -2.40 0.0166 5.9 -6.1062 -0.611803\n factor(cyl)8 -3.1859 2.170 -1.47 0.1422 2.8 -7.4399 1.068169\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Test of equality between coefficients\nhypotheses(mod, hypothesis = \"hp = wt\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Non-linear function\nhypotheses(mod, hypothesis = \"exp(hp + wt) = 0.1\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.0594 0.0292 -2.04 0.0418 4.6 -0.117 -0.0022\n\nTerm: exp(hp + wt) = 0.1\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Robust standard errors\nhypotheses(mod, hypothesis = \"hp = wt\", vcov = \"HC3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.805 3.92 <0.001 13.5 1.58 4.74\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# b1, b2, ... shortcuts can be used to identify the position of the\n# parameters of interest in the output of\nhypotheses(mod, hypothesis = \"b2 = b3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: b2 = b3\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# wildcard\nhypotheses(mod, hypothesis = \"b* / b2 = 1\")\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n b1 / b2 = 1 -1551 764.0 -2.03 0.0423 4.6 -3048.9 -54\n b2 / b2 = 1 0 NA NA NA NA NA NA\n b3 / b2 = 1 137 78.1 1.75 0.0804 3.6 -16.6 290\n b4 / b2 = 1 144 111.0 1.30 0.1938 2.4 -73.3 362\n b5 / b2 = 1 137 151.9 0.90 0.3679 1.4 -161.0 435\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# term names with special characters have to be enclosed in backticks\nhypotheses(mod, hypothesis = \"`factor(cyl)6` = `factor(cyl)8`\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.173 1.65 -0.105 0.917 0.1 -3.41 3.07\n\nTerm: `factor(cyl)6` = `factor(cyl)8`\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmod2 <- lm(mpg ~ hp * drat, data = mtcars)\nhypotheses(mod2, hypothesis = \"`hp:drat` = drat\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -6.08 2.89 -2.1 0.0357 4.8 -11.8 -0.405\n\nTerm: `hp:drat` = drat\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# predictions(), comparisons(), and slopes()\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\ncmp <- comparisons(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.28 0.104 -2.7 0.00684 7.2 -0.483 -0.0771\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmfx <- slopes(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b2 = 0.2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 0.0938 0.109 0.857 0.391 1.4 -0.121 0.308\n\nTerm: b2=0.2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\npre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35)))\nhypotheses(pre, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -3.57e-05 0.000172 -0.207 0.836 0.3 -0.000373 0.000302\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# The `hypothesis` argument can be used to compute standard errors for fitted values\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\n\nf <- function(x) predict(x, type = \"link\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 2 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 3 0.233 0.781 0.299 0.765 0.4 -1.30 1.764\n 4 -0.595 0.647 -0.919 0.358 1.5 -1.86 0.674\n 5 -0.418 0.647 -0.645 0.519 0.9 -1.69 0.851\n 6 -5.026 2.195 -2.290 0.022 5.5 -9.33 -0.725\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nf <- function(x) predict(x, type = \"response\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 2 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 3 0.55803 0.1926 2.898 0.00376 8.1 0.1806 0.9355\n 4 0.35560 0.1483 2.398 0.01648 5.9 0.0650 0.6462\n 5 0.39710 0.1550 2.562 0.01041 6.6 0.0933 0.7009\n 6 0.00652 0.0142 0.459 0.64653 0.6 -0.0213 0.0344\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Complex aggregation\n# Step 1: Collapse predicted probabilities by outcome level, for each individual\n# Step 2: Take the mean of the collapsed probabilities by group and `cyl`\nlibrary(dplyr)\nlibrary(MASS)\nlibrary(dplyr)\n\ndat <- transform(mtcars, gear = factor(gear))\nmod <- polr(gear ~ factor(cyl) + hp, dat)\n\naggregation_fun <- function(x) {\n predictions(x, vcov = FALSE) |>\n mutate(group = ifelse(group %in% c(\"3\", \"4\"), \"3 & 4\", \"5\")) |>\n summarize(estimate = sum(estimate), .by = c(\"rowid\", \"cyl\", \"group\")) |>\n summarize(estimate = mean(estimate), .by = c(\"cyl\", \"group\")) |>\n rename(term = cyl)\n}\n\nhypotheses(mod, hypothesis = aggregation_fun)\n\n\n Group Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3 & 4 6 0.8390 0.0651 12.89 <0.001 123.9 0.7115 0.967\n 3 & 4 4 0.7197 0.1099 6.55 <0.001 34.0 0.5044 0.935\n 3 & 4 8 0.9283 0.0174 53.45 <0.001 Inf 0.8943 0.962\n 5 6 0.1610 0.0651 2.47 0.0134 6.2 0.0334 0.289\n 5 4 0.2803 0.1099 2.55 0.0108 6.5 0.0649 0.496\n 5 8 0.0717 0.0174 4.13 <0.001 14.7 0.0377 0.106\n\nColumns: term, group, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Equivalence, non-inferiority, and non-superiority tests\nmod <- lm(mpg ~ hp + factor(gear), data = mtcars)\np <- predictions(mod, newdata = \"median\")\nhypotheses(p, equivalence = c(17, 18))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n 19.7 1 19.6 <0.001 281.3 17.7 21.6 0.951 0.00404\n p (Equiv) hp gear\n 0.951 123 3\n\nType: response \nColumns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, gear, mpg, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\nmfx <- avg_slopes(mod, variables = \"hp\")\nhypotheses(mfx, equivalence = c(-.1, .1))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -0.0669 0.011 -6.05 <0.001 29.4 -0.0885 -0.0452 <0.001 0.00135\n p (Equiv)\n 0.00135\n\nTerm: hp\nType: response \nComparison: mean(dY/dX)\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\ncmp <- avg_comparisons(mod, variables = \"gear\", hypothesis = \"pairwise\")\nhypotheses(cmp, equivalence = c(0, 10))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -3.94 2.05 -1.92 0.0543 4.2 -7.95 0.0727 <0.001 0.973\n p (Equiv)\n 0.973\n\nTerm: (mean(4) - mean(3)) - (mean(5) - mean(3))\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\n# joint hypotheses: character vector\nmodel <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars)\nhypotheses(model, joint = c(\"as.factor(cyl)6:hp\", \"as.factor(cyl)8:hp\"))\n\n\n\nJoint hypothesis test:\nas.factor(cyl)6:hp = 0\nas.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 2.11 0.142 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: regular expression\nhypotheses(model, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n as.factor(cyl)6:hp = 0\n as.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 5.7 0.00197 4 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: integer indices\nhypotheses(model, joint = 2:3)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n \n F Pr(>|F|) Df 1 Df 2\n 6.12 0.00665 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: different null hypotheses\nhypotheses(model, joint = 2:3, hypothesis = 1)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 1\n \n F Pr(>|F|) Df 1 Df 2\n 6.84 0.00411 2 26\n\nColumns: statistic, p.value, df1, df2 \n\nhypotheses(model, joint = 2:3, hypothesis = 1:2)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 2\n \n F Pr(>|F|) Df 1 Df 2\n 7.47 0.00273 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: marginaleffects object\ncmp <- avg_comparisons(model)\nhypotheses(cmp, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n cyl mean(6) - mean(4) = 0\n cyl mean(8) - mean(4) = 0\n \n F Pr(>|F|) Df 1 Df 2\n 1.6 0.221 2 26\n\nColumns: statistic, p.value, df1, df2", "crumbs": [ "Model to Meaning", "Functions", - "`predictions`" + "`hypotheses`" ] }, { - "objectID": "man/predictions.html#predictions", - "href": "man/predictions.html#predictions", + "objectID": "man/hypotheses.html#non-linear-tests-for-null-hypotheses-joint-hypotheses-equivalence-non-superiority-and-non-inferiority", + "href": "man/hypotheses.html#non-linear-tests-for-null-hypotheses-joint-hypotheses-equivalence-non-superiority-and-non-inferiority", "title": "", "section": "", - "text": "Outcome predicted by a fitted model on a specified scale for a given combination of values of the predictor variables, such as their observed values, their means, or factor levels (a.k.a. \"reference grid\").\n\n\npredictions(): unit-level (conditional) estimates.\n\n\navg_predictions(): average (marginal) estimates.\n\n\nThe newdata argument and the datagrid() function can be used to control where statistics are evaluated in the predictor space: \"at observed values\", \"at the mean\", \"at representative values\", etc.\nSee the predictions vignette and package website for worked examples and case studies:\n\n\nhttps://marginaleffects.com/vignettes/predictions.html\n\n\nhttps://marginaleffects.com/\n\n\npredictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = FALSE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\navg_predictions(\n model,\n newdata = NULL,\n variables = NULL,\n vcov = TRUE,\n conf_level = 0.95,\n type = NULL,\n by = TRUE,\n byfun = NULL,\n wts = FALSE,\n transform = NULL,\n hypothesis = NULL,\n equivalence = NULL,\n p_adjust = NULL,\n df = Inf,\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object\n\n\n\n\nnewdata\n\n\nGrid of predictor values at which we evaluate predictions.\n\n\nWarning: Please avoid modifying your dataset between fitting the model and calling a marginaleffects function. This can sometimes lead to unexpected results.\n\n\nNULL (default): Unit-level predictions for each observed value in the dataset (empirical distribution). The dataset is retrieved using insight::get_data(), which tries to extract data from the environment. This may produce unexpected results if the original data frame has been altered since fitting the model.\n\n\nstring:\n\n\n\"mean\": Predictions evaluated when each predictor is held at its mean or mode.\n\n\n\"median\": Predictions evaluated when each predictor is held at its median or mode.\n\n\n\"balanced\": Predictions evaluated on a balanced grid with every combination of categories and numeric variables held at their means.\n\n\n\"tukey\": Predictions evaluated at Tukey’s 5 numbers.\n\n\n\"grid\": Predictions evaluated on a grid of representative numbers (Tukey’s 5 numbers and unique values of categorical predictors).\n\n\n\n\ndatagrid() call to specify a custom grid of regressors. For example:\n\n\nnewdata = datagrid(cyl = c(4, 6)): cyl variable equal to 4 and 6 and other regressors fixed at their means or modes.\n\n\nSee the Examples section and the datagrid() documentation.\n\n\n\n\nsubset() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = subset(treatment == 1)\n\n\ndplyr::filter() call with a single argument to select a subset of the dataset used to fit the model, ex: newdata = filter(treatment == 1)\n\n\n\n\n\n\nvariables\n\n\nCounterfactual variables.\n\n\nOutput:\n\n\npredictions(): The entire dataset is replicated once for each unique combination of variables, and predictions are made.\n\n\navg_predictions(): The entire dataset is replicated, predictions are made, and they are marginalized by variables categories.\n\n\nWarning: This can be expensive in large datasets.\n\n\nWarning: Users who need \"conditional\" predictions should use the newdata argument instead of variables.\n\n\n\n\nInput:\n\n\nNULL: computes one prediction per row of newdata\n\n\nCharacter vector: the dataset is replicated once of every combination of unique values of the variables identified in variables.\n\n\nNamed list: names identify the subset of variables of interest and their values. For numeric variables, the variables argument supports functions and string shortcuts:\n\n\nA function which returns a numeric value\n\n\nNumeric vector: Contrast between the 2nd element and the 1st element of the x vector.\n\n\n\"iqr\": Contrast across the interquartile range of the regressor.\n\n\n\"sd\": Contrast across one standard deviation around the regressor mean.\n\n\n\"2sd\": Contrast across two standard deviations around the regressor mean.\n\n\n\"minmax\": Contrast between the maximum and the minimum values of the regressor.\n\n\n\"threenum\": mean and 1 standard deviation on both sides\n\n\n\"fivenum\": Tukey’s five numbers\n\n\n\n\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ntype\n\n\nstring indicates the type (scale) of the predictions used to compute contrasts or slopes. This can differ based on the model type, but will typically be a string such as: \"response\", \"link\", \"probs\", or \"zero\". When an unsupported string is entered, the model-specific list of acceptable values is returned in an error message. When type is NULL, the first entry in the error message is used by default.\n\n\n\n\nby\n\n\nAggregate unit-level estimates (aka, marginalize, average over). Valid inputs:\n\n\nFALSE: return the original unit-level estimates.\n\n\nTRUE: aggregate estimates for each term.\n\n\nCharacter vector of column names in newdata or in the data frame produced by calling the function without the by argument.\n\n\nData frame with a by column of group labels, and merging columns shared by newdata or the data frame produced by calling the same function without the by argument.\n\n\nSee examples below.\n\n\nFor more complex aggregations, you can use the FUN argument of the hypotheses() function. See that function’s documentation and the Hypothesis Test vignettes on the marginaleffects website.\n\n\n\n\n\n\nbyfun\n\n\nA function such as mean() or sum() used to aggregate estimates within the subgroups defined by the by argument. NULL uses the mean() function. Must accept a numeric vector and return a single numeric value. This is sometimes used to take the sum or mean of predicted probabilities across outcome or predictor levels. See examples section.\n\n\n\n\nwts\n\n\nlogical, string or numeric: weights to use when computing average predictions, contrasts or slopes. These weights only affect the averaging in avg_*() or with the by argument, and not unit-level estimates. See ?weighted.mean\n\n\nstring: column name of the weights variable in newdata. When supplying a column name to wts, it is recommended to supply the original data (including the weights variable) explicitly to newdata.\n\n\nnumeric: vector of length equal to the number of rows in the original data or in newdata (if supplied).\n\n\nFALSE: Equal weights.\n\n\nTRUE: Extract weights from the fitted object with insight::find_weights() and use them when taking weighted averages of estimates. Warning: newdata=datagrid() returns a single average weight, which is equivalent to using wts=FALSE\n\n\n\n\n\n\ntransform\n\n\nA function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\np_adjust\n\n\nAdjust p-values for multiple comparisons: \"holm\", \"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", or \"fdr\". See stats::p.adjust\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When df is Inf, the normal distribution is used. When df is finite, the t distribution is used. See insight::get_df for a convenient function to extract degrees of freedom. Ex: slopes(model, df = insight::get_df(model))\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nA data.frame with one row per observation and several columns:\n\n\nrowid: row number of the newdata data frame\n\n\ntype: prediction type, as defined by the type argument\n\n\ngroup: (optional) value of the grouped outcome (e.g., categorical outcome models)\n\n\nestimate: predicted outcome\n\n\nstd.error: standard errors computed using the delta method.\n\n\np.value: p value associated to the estimate column. The null is determined by the hypothesis argument (0 by default), and p values are computed before applying the transform argument. For models of class feglm, Gam, glm and negbin, p values are computed on the link scale by default unless the type argument is specified explicitly.\n\n\ns.value: Shannon information transforms of p values. How many consecutive \"heads\" tosses would provide the same amount of evidence (or \"surprise\") against the null hypothesis that the coin is fair? The purpose of S is to calibrate the analyst’s intuition about the strength of evidence encoded in p against a well-known physical phenomenon. See Greenland (2019) and Cole et al. (2020).\n\n\nconf.low: lower bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nconf.high: upper bound of the confidence interval (or equal-tailed interval for bayesian models)\n\n\nSee ?print.marginaleffects for printing options.\n\n\n\navg_predictions(): Average predictions\n\n\nStandard errors for all quantities estimated by marginaleffects can be obtained via the delta method. This requires differentiating a function with respect to the coefficients in the model using a finite difference approach. In some models, the delta method standard errors can be sensitive to various aspects of the numeric differentiation strategy, including the step size. By default, the step size is set to 1e-8, or to 1e-4 times the smallest absolute model coefficient, whichever is largest.\nmarginaleffects can delegate numeric differentiation to the numDeriv package, which allows more flexibility. To do this, users can pass arguments to the numDeriv::jacobian function through a global option. For example:\n\n\noptions(marginaleffects_numDeriv = list(method = “simple”, method.args = list(eps = 1e-6)))\n\n\noptions(marginaleffects_numDeriv = list(method = “Richardson”, method.args = list(eps = 1e-5)))\n\n\noptions(marginaleffects_numDeriv = NULL)\n\n\nSee the \"Standard Errors and Confidence Intervals\" vignette on the marginaleffects website for more details on the computation of standard errors:\nhttps://marginaleffects.com/vignettes/uncertainty.html\nNote that the inferences() function can be used to compute uncertainty estimates using a bootstrap or simulation-based inference. See the vignette:\nhttps://marginaleffects.com/vignettes/bootstrap.html\n\nSome model types allow model-specific arguments to modify the nature of marginal effects, predictions, marginal means, and contrasts. Please report other package-specific predict() arguments on Github so we can add them to the table below.\nhttps://github.com/vincentarelbundock/marginaleffects/issues\n\n\n\nPackage\n\n\nClass\n\n\nArgument\n\n\nDocumentation\n\n\n\n\nbrms\n\n\nbrmsfit\n\n\nndraws\n\n\nbrms::posterior_predict\n\n\n\n\n\n\n\n\nre_formula\n\n\nbrms::posterior_predict\n\n\n\n\nlme4\n\n\nmerMod\n\n\nre.form\n\n\nlme4::predict.merMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nlme4::predict.merMod\n\n\n\n\nglmmTMB\n\n\nglmmTMB\n\n\nre.form\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\n\n\n\n\nzitype\n\n\nglmmTMB::predict.glmmTMB\n\n\n\n\nmgcv\n\n\nbam\n\n\nexclude\n\n\nmgcv::predict.bam\n\n\n\n\n\n\ngam\n\n\nexclude\n\n\nmgcv::predict.gam\n\n\n\n\nrobustlmm\n\n\nrlmerMod\n\n\nre.form\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\n\n\n\n\nallow.new.levels\n\n\nrobustlmm::predict.rlmerMod\n\n\n\n\nMCMCglmm\n\n\nMCMCglmm\n\n\nndraws\n\n\n\n\n\n\nsampleSelection\n\n\nselection\n\n\npart\n\n\nsampleSelection::predict.selection\n\n\n\n\n\n\n\nBy default, credible intervals in bayesian models are built as equal-tailed intervals. This can be changed to a highest density interval by setting a global option:\noptions(“marginaleffects_posterior_interval” = “eti”)\noptions(“marginaleffects_posterior_interval” = “hdi”)\nBy default, the center of the posterior distribution in bayesian models is identified by the median. Users can use a different summary function by setting a global option:\noptions(“marginaleffects_posterior_center” = “mean”)\noptions(“marginaleffects_posterior_center” = “median”)\nWhen estimates are averaged using the by argument, the tidy() function, or the summary() function, the posterior distribution is marginalized twice over. First, we take the average across units but within each iteration of the MCMC chain, according to what the user requested in by argument or tidy()/summary() functions. Then, we identify the center of the resulting posterior using the function supplied to the “marginaleffects_posterior_center” option (the median by default).\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\nThe type argument determines the scale of the predictions used to compute quantities of interest with functions from the marginaleffects package. Admissible values for type depend on the model object. When users specify an incorrect value for type, marginaleffects will raise an informative error with a list of valid type values for the specific model object. The first entry in the list in that error message is the default type.\nThe invlink(link) is a special type defined by marginaleffects. It is available for some (but not all) models, and only for the predictions() function. With this link type, we first compute predictions on the link scale, then we use the inverse link function to backtransform the predictions to the response scale. This is useful for models with non-linear link functions as it can ensure that confidence intervals stay within desirable bounds, ex: 0 to 1 for a logit model. Note that an average of estimates with type=“invlink(link)” will not always be equivalent to the average of estimates with type=“response”. This type is default when calling predictions(). It is available—but not default—when calling avg_predictions() or predictions() with the by argument.\nSome of the most common type values are:\nresponse, link, E, Ep, average, class, conditional, count, cum.prob, cumhaz, cumprob, density, detection, disp, ev, expected, expvalue, fitted, hazard, invlink(link), latent, latent_N, linear, linear.predictor, linpred, location, lp, mean, numeric, p, ppd, pr, precision, prediction, prob, probability, probs, quantile, risk, rmst, scale, survival, unconditional, utility, variance, xb, zero, zlink, zprob\n\nBehind the scenes, the arguments of marginaleffects functions are evaluated in this order:\n\n\nnewdata\n\n\nvariables\n\n\ncomparison and slopes\n\n\nby\n\n\nvcov\n\n\nhypothesis\n\n\ntransform\n\n\nThe slopes() and comparisons() functions can use parallelism to speed up computation. Operations are parallelized for the computation of standard errors, at the model coefficient level. There is always considerable overhead when using parallel computation, mainly involved in passing the whole dataset to the different processes. Thus, parallel computation is most likely to be useful when the model includes many parameters and the dataset is relatively small.\nWarning: In many cases, parallel processing will not be useful at all.\nTo activate parallel computation, users must load the future.apply package, call plan() function, and set a global option. For example:\n\nlibrary(future.apply)\nplan(\"multicore\", workers = 4)\noptions(marginaleffects_parallel = TRUE)\n\nslopes(model)\n\n\nTo disable parallelism in marginaleffects altogether, you can set a global option:\n\noptions(marginaleffects_parallel = FALSE)\n\n\n\nThe behavior of marginaleffects functions can be modified by setting global options.\nDisable some safety checks:\n\noptions(marginaleffects_safe = FALSE)\n\n\nOmit some columns from the printed output:\n\noptions(marginaleffects_print_omit = c(\"p.value\", \"s.value\"))`\n\n\n\n\n\nGreenland S. 2019. \"Valid P-Values Behave Exactly as They Should: Some Misleading Criticisms of P-Values and Their Resolution With S-Values.\" The American Statistician. 73(S1): 106–114.\n\n\nCole, Stephen R, Jessie K Edwards, and Sander Greenland. 2020. \"Surprise!\" American Journal of Epidemiology 190 (2): 191–93. https://doi.org/10.1093/aje/kwaa136\n\n\n\nlibrary(\"marginaleffects\")\n\n\n\n# Adjusted Prediction for every row of the original dataset\nmod <- lm(mpg ~ hp + factor(cyl), data = mtcars)\npred <- predictions(mod)\nhead(pred)\n\n# Adjusted Predictions at User-Specified Values of the Regressors\npredictions(mod, newdata = datagrid(hp = c(100, 120), cyl = 4))\n\nm <- lm(mpg ~ hp + drat + factor(cyl) + factor(am), data = mtcars)\npredictions(m, newdata = datagrid(FUN_factor = unique, FUN_numeric = median))\n\n# Average Adjusted Predictions (AAP)\nlibrary(dplyr)\nmod <- lm(mpg ~ hp * am * vs, mtcars)\n\navg_predictions(mod)\n\npredictions(mod, by = \"am\")\n\n# Conditional Adjusted Predictions\nplot_predictions(mod, condition = \"hp\")\n\n# Counterfactual predictions with the `variables` argument\n# the `mtcars` dataset has 32 rows\n\nmod <- lm(mpg ~ hp + am, data = mtcars)\np <- predictions(mod)\nhead(p)\nnrow(p)\n\n# average counterfactual predictions\navg_predictions(mod, variables = \"am\")\n\n# counterfactual predictions obtained by replicating the entire for different\n# values of the predictors\np <- predictions(mod, variables = list(hp = c(90, 110)))\nnrow(p)\n\n\n# hypothesis test: is the prediction in the 1st row equal to the prediction in the 2nd row\nmod <- lm(mpg ~ wt + drat, data = mtcars)\n\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 = b2\")\n\n# same hypothesis test using row indices\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = \"b1 - b2 = 0\")\n\n# same hypothesis test using numeric vector of weights\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = c(1, -1))\n\n# two custom contrasts using a matrix of weights\nlc <- matrix(c(\n 1, -1,\n 2, 3),\n ncol = 2)\npredictions(\n mod,\n newdata = datagrid(wt = 2:3),\n hypothesis = lc)\n\n\n# `by` argument\nmod <- lm(mpg ~ hp * am * vs, data = mtcars)\npredictions(mod, by = c(\"am\", \"vs\"))\n\nlibrary(nnet)\nnom <- multinom(factor(gear) ~ mpg + am * vs, data = mtcars, trace = FALSE)\n\n# first 5 raw predictions\npredictions(nom, type = \"probs\") |> head()\n\n# average predictions\navg_predictions(nom, type = \"probs\", by = \"group\")\n\nby <- data.frame(\n group = c(\"3\", \"4\", \"5\"),\n by = c(\"3,4\", \"3,4\", \"5\"))\n\npredictions(nom, type = \"probs\", by = by)\n\n# sum of predicted probabilities for combined response levels\nmod <- multinom(factor(cyl) ~ mpg + am, data = mtcars, trace = FALSE)\nby <- data.frame(\n by = c(\"4,6\", \"4,6\", \"8\"),\n group = as.character(c(4, 6, 8)))\npredictions(mod, newdata = \"mean\", byfun = sum, by = by)", + "text": "Uncertainty estimates are calculated as first-order approximate standard errors for linear or non-linear functions of a vector of random variables with known or estimated covariance matrix. In that sense, hypotheses emulates the behavior of the excellent and well-established car::deltaMethod and car::linearHypothesis functions, but it supports more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority; and offers convenience features like robust standard errors.\nTo learn more, read the hypothesis tests vignette, visit the package website, or scroll down this page for a full list of vignettes:\n\n\nhttps://marginaleffects.com/vignettes/hypothesis.html\n\n\nhttps://marginaleffects.com/\n\n\nWarning #1: Tests are conducted directly on the scale defined by the type argument. For some models, it can make sense to conduct hypothesis or equivalence tests on the “link” scale instead of the “response” scale which is often the default.\nWarning #2: For hypothesis tests on objects produced by the marginaleffects package, it is safer to use the hypothesis argument of the original function. Using hypotheses() may not work in certain environments, in lists, or when working programmatically with *apply style functions.\nWarning #3: The tests assume that the hypothesis expression is (approximately) normally distributed, which for non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using the inferences() function with method = “boot”.\n\nhypotheses(\n model,\n hypothesis = NULL,\n vcov = NULL,\n conf_level = 0.95,\n df = NULL,\n equivalence = NULL,\n joint = FALSE,\n joint_test = \"f\",\n numderiv = \"fdforward\",\n ...\n)\n\n\n\n\n\nmodel\n\n\nModel object or object generated by the comparisons(), slopes(), or predictions() functions.\n\n\n\n\nhypothesis\n\n\nspecify a hypothesis test or custom contrast using a numeric value, vector, or matrix; a string equation; string; a formula, or a function.\n\n\nNumeric:\n\n\nSingle value: the null hypothesis used in the computation of Z and p (before applying transform).\n\n\nVector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the hypothesis argument.\n\n\nMatrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.\n\n\n\n\nString equation to specify linear or non-linear hypothesis tests. If the term column uniquely identifies rows, terms can be used in the formula. Otherwise, use b1, b2, etc. to identify the position of each parameter. The b* wildcard can be used to test hypotheses on all estimates. If a named vector is used, the names are used as labels in the output. Examples:\n\n\nhp = drat\n\n\nhp + drat = 12\n\n\nb1 + b2 + b3 = 0\n\n\nb* / b1 = 1\n\n\n\n\nString:\n\n\n\"pairwise\": pairwise differences between estimates in each row.\n\n\n\"reference\": differences between the estimates in each row and the estimate in the first row.\n\n\n\"sequential\": difference between an estimate and the estimate in the next row.\n\n\n\"meandev\": difference between an estimate and the mean of all estimates.\n\n\n\"meanotherdev\": difference between an estimate and the mean of all other estimates, excluding the current one.\n\n\n\"revpairwise\", \"revreference\", \"revsequential\": inverse of the corresponding hypotheses, as described above.\n\n\n\n\nFormula:\n\n\ncomparison ~ pairs | group\n\n\nLeft-hand side determines the type of comparison to conduct: difference or ratio. If the left-hand side is empty, difference is chosen.\n\n\nRight-hand side determines the pairs of estimates to compare: reference, sequential, or meandev\n\n\nOptional: Users can supply grouping variables after a vertical bar to conduct comparisons withing subsets.\n\n\nExamples:\n\n\n~ reference\n\n\nratio ~ pairwise\n\n\ndifference ~ pairwise | groupid\n\n\n\n\n\n\nFunction:\n\n\nAccepts an argument x: object produced by a marginaleffects function or a data frame with column rowid and estimate\n\n\nReturns a data frame with columns term and estimate (mandatory) and rowid (optional).\n\n\nThe function can also accept optional input arguments: newdata, by, draws.\n\n\nThis function approach will not work for Bayesian models or with bootstrapping. In those cases, it is easy to use posterior_draws() to extract and manipulate the draws directly.\n\n\n\n\nSee the Examples section below and the vignette: https://marginaleffects.com/vignettes/hypothesis.html\n\n\n\n\n\n\nvcov\n\n\nType of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:\n\n\nFALSE: Do not compute standard errors. This can speed up computation considerably.\n\n\nTRUE: Unit-level standard errors using the default vcov(model) variance-covariance matrix.\n\n\nString which indicates the kind of uncertainty estimates to return.\n\n\nHeteroskedasticity-consistent: “HC”, “HC0”, “HC1”, “HC2”, “HC3”, “HC4”, “HC4m”, “HC5”. See ?sandwich::vcovHC\n\n\nHeteroskedasticity and autocorrelation consistent: “HAC”\n\n\nMixed-Models degrees of freedom: \"satterthwaite\", \"kenward-roger\"\n\n\nOther: “NeweyWest”, “KernHAC”, “OPG”. See the sandwich package documentation.\n\n\n\n\nOne-sided formula which indicates the name of cluster variables (e.g., ~unit_id). This formula is passed to the cluster argument of the sandwich::vcovCL function.\n\n\nSquare covariance matrix\n\n\nFunction which returns a covariance matrix (e.g., stats::vcov(model))\n\n\n\n\n\n\nconf_level\n\n\nnumeric value between 0 and 1. Confidence level to use to build a confidence interval.\n\n\n\n\ndf\n\n\nDegrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and Inf. When using joint_test=“f”, the df argument should be a numeric vector of length 2.\n\n\n\n\nequivalence\n\n\nNumeric vector of length 2: bounds used for the two-one-sided test (TOST) of equivalence, and for the non-inferiority and non-superiority tests. See Details section below.\n\n\n\n\njoint\n\n\nJoint test of statistical significance. The null hypothesis value can be set using the hypothesis argument.\n\n\nFALSE: Hypotheses are not tested jointly.\n\n\nTRUE: All parameters are tested jointly.\n\n\nString: A regular expression to match parameters to be tested jointly. grep(joint, perl = TRUE)\n\n\nCharacter vector of parameter names to be tested. Characters refer to the names of the vector returned by coef(object).\n\n\nInteger vector of indices. Which parameters positions to test jointly.\n\n\n\n\n\n\njoint_test\n\n\nA character string specifying the type of test, either \"f\" or \"chisq\". The null hypothesis is set by the hypothesis argument, with default null equal to 0 for all parameters.\n\n\n\n\nnumderiv\n\n\nstring or list of strings indicating the method to use to for the numeric differentiation used in to compute delta method standard errors.\n\n\n\"fdforward\": finite difference method with forward differences\n\n\n\"fdcenter\": finite difference method with central differences (default)\n\n\n\"richardson\": Richardson extrapolation method\n\n\nExtra arguments can be specified by passing a list to the numDeriv argument, with the name of the method first and named arguments following, ex: numderiv=list(“fdcenter”, eps = 1e-5). When an unknown argument is used, marginaleffects prints the list of valid arguments for each method.\n\n\n\n\n\n\n…\n\n\nAdditional arguments are passed to the predict() method supplied by the modeling package.These arguments are particularly useful for mixed-effects or bayesian models (see the online vignettes on the marginaleffects website). Available arguments can vary from model to model, depending on the range of supported arguments by each modeling package. See the \"Model-Specific Arguments\" section of the ?slopes documentation for a non-exhaustive list of available arguments.\n\n\n\nThe test statistic for the joint Wald test is calculated as (R * theta_hat - r)’ * inv(R * V_hat * R’) * (R * theta_hat - r) / Q, where theta_hat is the vector of estimated parameters, V_hat is the estimated covariance matrix, R is a Q x P matrix for testing Q hypotheses on P parameters, r is a Q x 1 vector for the null hypothesis, and Q is the number of rows in R. If the test is a Chi-squared test, the test statistic is not normalized.\nThe p-value is then calculated based on either the F-distribution (for F-test) or the Chi-squared distribution (for Chi-squared test). For the F-test, the degrees of freedom are Q and (n - P), where n is the sample size and P is the number of parameters. For the Chi-squared test, the degrees of freedom are Q.\n\n\\(\\theta\\) is an estimate, \\(\\sigma_\\theta\\) its estimated standard error, and \\([a, b]\\) are the bounds of the interval supplied to the equivalence argument.\nNon-inferiority:\n\n\n\\(H_0\\): \\(\\theta \\leq a\\)\n\n\n\\(H_1\\): \\(\\theta > a\\)\n\n\n\\(t=(\\theta - a)/\\sigma_\\theta\\)\n\n\np: Upper-tail probability\n\n\nNon-superiority:\n\n\n\\(H_0\\): \\(\\theta \\geq b\\)\n\n\n\\(H_1\\): \\(\\theta < b\\)\n\n\n\\(t=(\\theta - b)/\\sigma_\\theta\\)\n\n\np: Lower-tail probability\n\n\nEquivalence: Two One-Sided Tests (TOST)\n\n\np: Maximum of the non-inferiority and non-superiority p values.\n\n\nThanks to Russell V. Lenth for the excellent emmeans package and documentation which inspired this feature.\n\n\nlibrary(\"marginaleffects\")\n\nlibrary(marginaleffects)\nmod <- lm(mpg ~ hp + wt + factor(cyl), data = mtcars)\n\nhypotheses(mod)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n (Intercept) 35.8460 2.041 17.56 <0.001 227.0 31.8457 39.846319\n hp -0.0231 0.012 -1.93 0.0531 4.2 -0.0465 0.000306\n wt -3.1814 0.720 -4.42 <0.001 16.6 -4.5918 -1.771012\n factor(cyl)6 -3.3590 1.402 -2.40 0.0166 5.9 -6.1062 -0.611803\n factor(cyl)8 -3.1859 2.170 -1.47 0.1422 2.8 -7.4399 1.068169\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Test of equality between coefficients\nhypotheses(mod, hypothesis = \"hp = wt\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Non-linear function\nhypotheses(mod, hypothesis = \"exp(hp + wt) = 0.1\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.0594 0.0292 -2.04 0.0418 4.6 -0.117 -0.0022\n\nTerm: exp(hp + wt) = 0.1\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Robust standard errors\nhypotheses(mod, hypothesis = \"hp = wt\", vcov = \"HC3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.805 3.92 <0.001 13.5 1.58 4.74\n\nTerm: hp = wt\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# b1, b2, ... shortcuts can be used to identify the position of the\n# parameters of interest in the output of\nhypotheses(mod, hypothesis = \"b2 = b3\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3.16 0.72 4.39 <0.001 16.4 1.75 4.57\n\nTerm: b2 = b3\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# wildcard\nhypotheses(mod, hypothesis = \"b* / b2 = 1\")\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n b1 / b2 = 1 -1551 764.0 -2.03 0.0423 4.6 -3048.9 -54\n b2 / b2 = 1 0 NA NA NA NA NA NA\n b3 / b2 = 1 137 78.1 1.75 0.0804 3.6 -16.6 290\n b4 / b2 = 1 144 111.0 1.30 0.1938 2.4 -73.3 362\n b5 / b2 = 1 137 151.9 0.90 0.3679 1.4 -161.0 435\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# term names with special characters have to be enclosed in backticks\nhypotheses(mod, hypothesis = \"`factor(cyl)6` = `factor(cyl)8`\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.173 1.65 -0.105 0.917 0.1 -3.41 3.07\n\nTerm: `factor(cyl)6` = `factor(cyl)8`\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmod2 <- lm(mpg ~ hp * drat, data = mtcars)\nhypotheses(mod2, hypothesis = \"`hp:drat` = drat\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -6.08 2.89 -2.1 0.0357 4.8 -11.8 -0.405\n\nTerm: `hp:drat` = drat\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# predictions(), comparisons(), and slopes()\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\ncmp <- comparisons(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -0.28 0.104 -2.7 0.00684 7.2 -0.483 -0.0771\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nmfx <- slopes(mod, newdata = \"mean\")\nhypotheses(cmp, hypothesis = \"b2 = 0.2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 0.0938 0.109 0.857 0.391 1.4 -0.121 0.308\n\nTerm: b2=0.2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\npre <- predictions(mod, newdata = datagrid(hp = 110, mpg = c(30, 35)))\nhypotheses(pre, hypothesis = \"b1 = b2\")\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n -3.57e-05 0.000172 -0.207 0.836 0.3 -0.000373 0.000302\n\nTerm: b1=b2\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# The `hypothesis` argument can be used to compute standard errors for fitted values\nmod <- glm(am ~ hp + mpg, data = mtcars, family = binomial)\n\nf <- function(x) predict(x, type = \"link\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 2 -1.098 0.716 -1.534 0.125 3.0 -2.50 0.305\n 3 0.233 0.781 0.299 0.765 0.4 -1.30 1.764\n 4 -0.595 0.647 -0.919 0.358 1.5 -1.86 0.674\n 5 -0.418 0.647 -0.645 0.519 0.9 -1.69 0.851\n 6 -5.026 2.195 -2.290 0.022 5.5 -9.33 -0.725\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\nf <- function(x) predict(x, type = \"response\", newdata = mtcars)\np <- hypotheses(mod, hypothesis = f)\nhead(p)\n\n\n Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 1 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 2 0.25005 0.1343 1.862 0.06257 4.0 -0.0131 0.5132\n 3 0.55803 0.1926 2.898 0.00376 8.1 0.1806 0.9355\n 4 0.35560 0.1483 2.398 0.01648 5.9 0.0650 0.6462\n 5 0.39710 0.1550 2.562 0.01041 6.6 0.0933 0.7009\n 6 0.00652 0.0142 0.459 0.64653 0.6 -0.0213 0.0344\n\nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Complex aggregation\n# Step 1: Collapse predicted probabilities by outcome level, for each individual\n# Step 2: Take the mean of the collapsed probabilities by group and `cyl`\nlibrary(dplyr)\nlibrary(MASS)\nlibrary(dplyr)\n\ndat <- transform(mtcars, gear = factor(gear))\nmod <- polr(gear ~ factor(cyl) + hp, dat)\n\naggregation_fun <- function(x) {\n predictions(x, vcov = FALSE) |>\n mutate(group = ifelse(group %in% c(\"3\", \"4\"), \"3 & 4\", \"5\")) |>\n summarize(estimate = sum(estimate), .by = c(\"rowid\", \"cyl\", \"group\")) |>\n summarize(estimate = mean(estimate), .by = c(\"cyl\", \"group\")) |>\n rename(term = cyl)\n}\n\nhypotheses(mod, hypothesis = aggregation_fun)\n\n\n Group Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n 3 & 4 6 0.8390 0.0651 12.89 <0.001 123.9 0.7115 0.967\n 3 & 4 4 0.7197 0.1099 6.55 <0.001 34.0 0.5044 0.935\n 3 & 4 8 0.9283 0.0174 53.45 <0.001 Inf 0.8943 0.962\n 5 6 0.1610 0.0651 2.47 0.0134 6.2 0.0334 0.289\n 5 4 0.2803 0.1099 2.55 0.0108 6.5 0.0649 0.496\n 5 8 0.0717 0.0174 4.13 <0.001 14.7 0.0377 0.106\n\nColumns: term, group, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \n\n# Equivalence, non-inferiority, and non-superiority tests\nmod <- lm(mpg ~ hp + factor(gear), data = mtcars)\np <- predictions(mod, newdata = \"median\")\nhypotheses(p, equivalence = c(17, 18))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n 19.7 1 19.6 <0.001 281.3 17.7 21.6 0.951 0.00404\n p (Equiv) hp gear\n 0.951 123 3\n\nType: response \nColumns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, hp, gear, mpg, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\nmfx <- avg_slopes(mod, variables = \"hp\")\nhypotheses(mfx, equivalence = c(-.1, .1))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -0.0669 0.011 -6.05 <0.001 29.4 -0.0885 -0.0452 <0.001 0.00135\n p (Equiv)\n 0.00135\n\nTerm: hp\nType: response \nComparison: mean(dY/dX)\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\ncmp <- avg_comparisons(mod, variables = \"gear\", hypothesis = \"pairwise\")\nhypotheses(cmp, equivalence = c(0, 10))\n\n\n Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 % p (NonSup) p (NonInf)\n -3.94 2.05 -1.92 0.0543 4.2 -7.95 0.0727 <0.001 0.973\n p (Equiv)\n 0.973\n\nTerm: (mean(4) - mean(3)) - (mean(5) - mean(3))\nType: response \nColumns: term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, statistic.noninf, statistic.nonsup, p.value.noninf, p.value.nonsup, p.value.equiv \n\n# joint hypotheses: character vector\nmodel <- lm(mpg ~ as.factor(cyl) * hp, data = mtcars)\nhypotheses(model, joint = c(\"as.factor(cyl)6:hp\", \"as.factor(cyl)8:hp\"))\n\n\n\nJoint hypothesis test:\nas.factor(cyl)6:hp = 0\nas.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 2.11 0.142 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: regular expression\nhypotheses(model, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n as.factor(cyl)6:hp = 0\n as.factor(cyl)8:hp = 0\n \n F Pr(>|F|) Df 1 Df 2\n 5.7 0.00197 4 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: integer indices\nhypotheses(model, joint = 2:3)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 0\n as.factor(cyl)8 = 0\n \n F Pr(>|F|) Df 1 Df 2\n 6.12 0.00665 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: different null hypotheses\nhypotheses(model, joint = 2:3, hypothesis = 1)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 1\n \n F Pr(>|F|) Df 1 Df 2\n 6.84 0.00411 2 26\n\nColumns: statistic, p.value, df1, df2 \n\nhypotheses(model, joint = 2:3, hypothesis = 1:2)\n\n\n\nJoint hypothesis test:\n as.factor(cyl)6 = 1\n as.factor(cyl)8 = 2\n \n F Pr(>|F|) Df 1 Df 2\n 7.47 0.00273 2 26\n\nColumns: statistic, p.value, df1, df2 \n\n# joint hypotheses: marginaleffects object\ncmp <- avg_comparisons(model)\nhypotheses(cmp, joint = \"cyl\")\n\n\n\nJoint hypothesis test:\n cyl mean(6) - mean(4) = 0\n cyl mean(8) - mean(4) = 0\n \n F Pr(>|F|) Df 1 Df 2\n 1.6 0.221 2 26\n\nColumns: statistic, p.value, df1, df2", "crumbs": [ "Model to Meaning", "Functions", - "`predictions`" + "`hypotheses`" ] }, {