diff --git a/.nojekyll b/.nojekyll index 328e674..c0fcecb 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -ca36e381 \ No newline at end of file +07cd4f79 \ No newline at end of file diff --git a/chapters/means-and-contrasts.html b/chapters/means-and-contrasts.html index ff30e67..75411f0 100644 --- a/chapters/means-and-contrasts.html +++ b/chapters/means-and-contrasts.html @@ -285,13 +285,15 @@

Table of contents

  • 12.2.4 Model Inference
  • 12.2.5 Estimated Marginal Means
  • -
  • 12.3 Compact letter displays
  • -
  • 12.4 Alternatives to CLD +
  • 12.3 Contrasts using emmeans
  • -
  • 12.5 using adjusted P-values
  • +
  • 12.4 Compact letter displays
  • +
  • 12.5 Alternatives to CLD
  • +
  • 12.6 Export emmeans to excel sheet
  • +
  • 12.7 Graphical display of emmeans
  • +
  • 12.8 Conclusion
  • @@ -342,7 +344,8 @@

    12.2 Analysis Examples

    library(nlme); library(performance); library(emmeans)
    -library(dplyr); library(broom.mixed)
    +library(dplyr); library(broom.mixed); library(multcompView) +library(multcomp); library(ggplot2)

    12.2.1 Import data

    @@ -362,258 +365,379 @@

    data = data1, na.action = na.exclude) tidy(model1) +
    +
    Warning in tidy.lme(model1): ran_pars not yet implemented for multiple levels
    +of nesting
    +
    +
    +
    # A tibble: 12 × 7
    +   effect term                estimate std.error    df statistic  p.value
    +   <chr>  <chr>                  <dbl>     <dbl> <dbl>     <dbl>    <dbl>
    + 1 fixed  (Intercept)           80          9.11    45    8.78   2.56e-11
    + 2 fixed  VMarvellous            6.67       9.72    10    0.686  5.08e- 1
    + 3 fixed  VVictory              -8.50       9.72    10   -0.875  4.02e- 1
    + 4 fixed  N0.2cwt               18.5        7.68    45    2.41   2.02e- 2
    + 5 fixed  N0.4cwt               34.7        7.68    45    4.51   4.58e- 5
    + 6 fixed  N0.6cwt               44.8        7.68    45    5.84   5.48e- 7
    + 7 fixed  VMarvellous:N0.2cwt    3.33      10.9     45    0.307  7.60e- 1
    + 8 fixed  VVictory:N0.2cwt      -0.333     10.9     45   -0.0307 9.76e- 1
    + 9 fixed  VMarvellous:N0.4cwt   -4.17      10.9     45   -0.383  7.03e- 1
    +10 fixed  VVictory:N0.4cwt       4.67      10.9     45    0.430  6.70e- 1
    +11 fixed  VMarvellous:N0.6cwt   -4.67      10.9     45   -0.430  6.70e- 1
    +12 fixed  VVictory:N0.6cwt       2.17      10.9     45    0.199  8.43e- 1
    +

    12.2.3 Check Model Assumptions

    -
    check_model(model1, check = c('normality', 'linearity'))
    +
    check_model(model1, check = c('normality', 'linearity'))
    +
    +
    +
    +

    +
    +
    +

    12.2.4 Model Inference

    -
    anova(model1, type = "marginal")
    +
    anova(model1, type = "marginal")
    +
    +
                numDF denDF  F-value p-value
    +(Intercept)     1    45 77.16729  <.0001
    +V               2    10  1.22454  0.3344
    +N               3    45 13.02273  <.0001
    +V:N             6    45  0.30282  0.9322
    +

    12.2.5 Estimated Marginal Means

    -

    Now that we have a good model, let’s use the emmeans() function to obtain EMMs.

    Now that we have fitted a linear mixed model (model1) and it meets the model assumption. Let’s use the emmeans() function to obtain estimated marginal means for main (variety and nitrogen) and interaction (variety x nitrogen) effects.

    12.2.5.1 Main effects

    -
    m1 <- emmeans(model1, ~V, level = 0.95)
    -m1
    +
    m1 <- emmeans(model1, ~V, level = 0.95)
    +
    +
    NOTE: Results may be misleading due to involvement in interactions
    +
    +
    m1
    +
    +
     V           emmean  SE df lower.CL upper.CL
    + Golden.rain  104.5 7.8  5     84.5      125
    + Marvellous   109.8 7.8  5     89.7      130
    + Victory       97.6 7.8  5     77.6      118
    +
    +Results are averaged over the levels of: N 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +
    -
    m2 <- emmeans(model1, ~N)
    -m2
    +
    m2 <- emmeans(model1, ~N)
    +
    +
    NOTE: Results may be misleading due to involvement in interactions
    +
    +
    m2
    +
    +
     N      emmean   SE df lower.CL upper.CL
    + 0.0cwt   79.4 7.17  5     60.9     97.8
    + 0.2cwt   98.9 7.17  5     80.4    117.3
    + 0.4cwt  114.2 7.17  5     95.8    132.7
    + 0.6cwt  123.4 7.17  5    104.9    141.8
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +
    +

    Make sure to read and interpret EMMs carefully. Here, when we calculated EMMs for main effects of V and N, these were averaged over the levels of other factor in experiment. For example, estimated means for each variety were averaged over it’s N treatments, respectively.

    12.2.5.2 Interaction effects

    +

    Now let’s evaluate the interaction effect EMMs for V and N. These can be calculated either using V*N or V|N.

    -
    m3 <- emmeans(model1, ~V*N)
    -m3
    +
    m3 <- emmeans(model1, ~V*N)
    +m3
    +
    +
     V           N      emmean   SE df lower.CL upper.CL
    + Golden.rain 0.0cwt   80.0 9.11  5     56.6    103.4
    + Marvellous  0.0cwt   86.7 9.11  5     63.3    110.1
    + Victory     0.0cwt   71.5 9.11  5     48.1     94.9
    + Golden.rain 0.2cwt   98.5 9.11  5     75.1    121.9
    + Marvellous  0.2cwt  108.5 9.11  5     85.1    131.9
    + Victory     0.2cwt   89.7 9.11  5     66.3    113.1
    + Golden.rain 0.4cwt  114.7 9.11  5     91.3    138.1
    + Marvellous  0.4cwt  117.2 9.11  5     93.8    140.6
    + Victory     0.4cwt  110.8 9.11  5     87.4    134.2
    + Golden.rain 0.6cwt  124.8 9.11  5    101.4    148.2
    + Marvellous  0.6cwt  126.8 9.11  5    103.4    150.2
    + Victory     0.6cwt  118.5 9.11  5     95.1    141.9
    +
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +
    -
    m4 <- emmeans(model1, ~V|N)
    -m4
    +
    m4 <- emmeans(model1, ~V|N)
    +m4
    +
    +
    N = 0.0cwt:
    + V           emmean   SE df lower.CL upper.CL
    + Golden.rain   80.0 9.11  5     56.6    103.4
    + Marvellous    86.7 9.11  5     63.3    110.1
    + Victory       71.5 9.11  5     48.1     94.9
    +
    +N = 0.2cwt:
    + V           emmean   SE df lower.CL upper.CL
    + Golden.rain   98.5 9.11  5     75.1    121.9
    + Marvellous   108.5 9.11  5     85.1    131.9
    + Victory       89.7 9.11  5     66.3    113.1
    +
    +N = 0.4cwt:
    + V           emmean   SE df lower.CL upper.CL
    + Golden.rain  114.7 9.11  5     91.3    138.1
    + Marvellous   117.2 9.11  5     93.8    140.6
    + Victory      110.8 9.11  5     87.4    134.2
    +
    +N = 0.6cwt:
    + V           emmean   SE df lower.CL upper.CL
    + Golden.rain  124.8 9.11  5    101.4    148.2
    + Marvellous   126.8 9.11  5    103.4    150.2
    + Victory      118.5 9.11  5     95.1    141.9
    +
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +
    -
    -

    12.3 Compact letter displays

    -

    Compact letter displays (CLDs) are a popular way to display multiple comparisons when there are more than few group means to compare. However, they are problematic as they are more prone to misinterpretation. The R package multcompView (Graves et al., 2019) provides an implementation of CLDs creating a display where any two means associated with same symbol are not statistically different.

    -

    The cld() function from the multcomp package is used to implement CLDs in the form of symbols or letters. The emmeans package provides a emmGrid objects for cld() method.

    -

    Let’s start evaluating CLDs for main effects. We will use emmean objects m1 and m2 for this.

    +
    +

    12.3 Contrasts using emmeans

    +

    Firstly, the pairs() function from emmeans package can be used to evaluate the pairwise comparison among treatment objects. The emmean object (m1, m2) will be passed through pairs() function which will provide a p-value adjustment equivalent to the Tukey test.

    -
    cld(m1, alpha=0.05, Letters=letters)
    +
    pairs(m1, adjust = "tukey")
    +
    +
     contrast                 estimate   SE df t.ratio p.value
    + Golden.rain - Marvellous    -5.29 7.08 10  -0.748  0.7419
    + Golden.rain - Victory        6.88 7.08 10   0.971  0.6104
    + Marvellous - Victory        12.17 7.08 10   1.719  0.2458
    +
    +Results are averaged over the levels of: N 
    +Degrees-of-freedom method: containment 
    +P value adjustment: tukey method for comparing a family of 3 estimates 
    +
    -
    cld(m2, alpha=0.05, Letters=letters)
    +
    pairs(m2)
    +
    +
     contrast        estimate   SE df t.ratio p.value
    + 0.0cwt - 0.2cwt   -19.50 4.44 45  -4.396  0.0004
    + 0.0cwt - 0.4cwt   -34.83 4.44 45  -7.853  <.0001
    + 0.0cwt - 0.6cwt   -44.00 4.44 45  -9.919  <.0001
    + 0.2cwt - 0.4cwt   -15.33 4.44 45  -3.457  0.0064
    + 0.2cwt - 0.6cwt   -24.50 4.44 45  -5.523  <.0001
    + 0.4cwt - 0.6cwt    -9.17 4.44 45  -2.067  0.1797
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +P value adjustment: tukey method for comparing a family of 4 estimates 
    -

    Let’s have a look at the CLDs for the interaction effect:

    +
    +

    Here if we look at the results from code chunk above, it’s easy to interpret results from pairs() function in case of varietey comparison becuase there were only 3 groups. It’s bit confusing in case of Nitrogen treatments where we had 4 groups. We can further simplify it by using custom contrasts.

    +
    +
    +
    + +
    +
    +pairs() +
    +
    +
    +

    Remember!! The pairs() function can be used to calculate pairwise comparison when treatment groups are less than equal to 3.

    +
    +
    +
    +

    12.3.1 Custom contrasts

    +

    Firstly, let’s run emmean object ‘m2’ for nitrogen treatment comparison.

    -
    cld(m3, alpha=0.05, Letters=letters)
    +
    m2
    +
    +
     N      emmean   SE df lower.CL upper.CL
    + 0.0cwt   79.4 7.17  5     60.9     97.8
    + 0.2cwt   98.9 7.17  5     80.4    117.3
    + 0.4cwt  114.2 7.17  5     95.8    132.7
    + 0.6cwt  123.4 7.17  5    104.9    141.8
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    -

    Interpreation of these letters is: Here we have a significant difference in grain yield with varieties “victory”, with N treatments of 0.0cwt, 0.2cwt, 0.4cwt, and 0.6wt. Grain yield for Golden.rain variety was significantly lower with 0.0cwt N treatment compared to the 0.2cwt, 0.4cwt, and 0.6wt treatments.

    -
    -
    -

    12.4 Alternatives to CLD

    + +

    Now, lets a create a vector for each nitrogen treatment in the same order as presented in output from m2.

    -
    One-way estimated marginal means and plot
    -
    -library(multcomp)
    -library(emmeans)
    -
    -marginal = emmeans(model, ~ Location)
    -
    -CLD = cld(marginal,
    -          alpha=0.05,
    -          Letters=letters,
    -          adjust="tukey")
    -
    -CLD
    -
    -
    -Location        emmean        SE df  lower.CL upper.CL .group
    - Olympia      8.333333 0.6718548 16  6.449596 10.21707  a   
    - Northampton 11.833333 0.6718548 16  9.949596 13.71707   b  
    - Ventura     13.333333 0.6718548 16 11.449596 15.21707   b  
    - Burlington  21.833333 0.6718548 16 19.949596 23.71707    c 
    -
    -
    -
    -### Order the levels for printing
    -
    -CLD$Location = factor(CLD$Location,
    -                       levels=c("Olympia", "Ventura", "Northampton", "Burlington"))
    -
    -###  Remove spaces in .group 
    -
    -CLD$.group=gsub(" ", "", CLD$.group)
    -
    -
    -### Plot
    -
    -library(ggplot2)
    -
    -ggplot(CLD,
    -       aes(x     = Location,
    -           y     = emmean,
    -           label = .group)) +
    -
    -    geom_point(shape  = 15,
    -               size   = 4) +
    -
    -    geom_errorbar(aes(ymin  =  lower.CL,
    -                      ymax  =  upper.CL),
    -                      width =  0.2,
    -                      size  =  0.7) +
    -
    -    theme_bw() +
    -    theme(axis.title   = element_text(face = "bold"),
    -          axis.text    = element_text(face = "bold"),
    -          plot.caption = element_text(hjust = 0)) +
    -
    -    ylab("Estimated marginal mean\nmidichlorian count") +
    -    ggtitle ("Midichlorian counts",
    -
    -             subtitle = "In four U.S. cities") +
    -
    -            labs(caption  = paste0("\nMidichlorian counts for four locations. ",
    -                                   "Boxes indicate the EM mean. \n",
    -                                   "Error bars indicate the 95% ",
    -                                   "confidence interval of the EM mean. \n",
    -                                   "Means sharing a letter are not ",
    -                                   "significantly different (Tukey-adjusted \n",
    -                                   "comparisons)."),
    -                            hjust=0.5) +
    -
    -  geom_text(nudge_x = c(0, 0, 0, 0),
    -            nudge_y = c(4, 4, 4, 4),
    -            color   = "black")
    -
    -
    -

    12.4.1 Interactions using Emmeans

    +
    A1 = c(1, 0, 0, 0)
    +A2 = c(0, 1, 0, 0)
    +A3 = c(0, 0, 1, 0)
    +A4 = c(0, 0, 0, 1)
    + +

    These vectors (A1, A2, A3, A4) represent each Nitrogen treatment in an order as presented in m2 emmeans object. A1, A2, and A3, A4 vectors represents 0.0cwt, 0.2cwt, 0.4cwt, and 0.6cwt treatments, respectively.

    +

    Let’s create custom contrasts for comparing ‘0.0cwt’ (A1) treatment to ‘0.2cwt’ (A2), ‘0.4cwt’ (A3), and ‘0.6cwt’ (A4) treatments. This can be evaluated as shown below: Here the output shows the difference in mean yield between these two varieties

    -
    Interaction plot of estimated marginal means
    -
    -library(multcomp)
    -library(emmeans)
    -
    -marginal = emmeans(model,
    -                   ~ Tribe:Location)
    -
    -CLD = cld(marginal,
    -          alpha=0.05,
    -          Letters=letters,
    -          adjust="tukey")
    -
    -CLD
    -
    -
    -Tribe  Location       emmean        SE df  lower.CL upper.CL .group
    - Sith  Olympia      4.333333 0.9501462 16  1.354477  7.31219  a   
    - Jedi  Northampton  8.666667 0.9501462 16  5.687810 11.64552  ab  
    - Sith  Ventura     10.666667 0.9501462 16  7.687810 13.64552   bc 
    - Jedi  Olympia     12.333333 0.9501462 16  9.354477 15.31219   bcd
    - Sith  Northampton 15.000000 0.9501462 16 12.021143 17.97886    cd
    - Jedi  Ventura     16.000000 0.9501462 16 13.021143 18.97886     d
    - Jedi  Burlington  20.666667 0.9501462 16 17.687810 23.64552      e
    - Sith  Burlington  23.000000 0.9501462 16 20.021143 25.97886      e
    -
    -
    -
    -### Order the levels for printing
    -
    -CLD$Location = factor(CLD$Location,
    -                       levels=c("Olympia", "Ventura", "Northampton", "Burlington"))
    -
    -CLD$Tribe = factor(CLD$Tribe,
    -                       levels=c("Jedi", "Sith"))
    -
    -###  Remove spaces in .group 
    -
    -CLD$.group=gsub(" ", "", CLD$.group)
    -
    -
    -CLD
    -
    -
    -### Plot
    -
    -library(ggplot2)
    -
    -pd = position_dodge(0.4)    ### How much to jitter the points on the plot
    -
    -ggplot(CLD,
    -       aes(x     = Location,
    -           y     = emmean,
    -           color = Tribe,
    -           label = .group)) +
    -
    -    geom_point(shape  = 15,
    -               size   = 4,
    -             position = pd) +
    -
    -    geom_errorbar(aes(ymin  =  lower.CL,
    -                      ymax  =  upper.CL),
    -                      width =  0.2,
    -                      size  =  0.7,
    -                      position = pd) +
    -
    -    theme_bw() +
    -    theme(axis.title   = element_text(face = "bold"),
    -          axis.text    = element_text(face = "bold"),
    -          plot.caption = element_text(hjust = 0)) +
    -
    -    ylab("Estimated marginal mean\nmidichlorian count") +
    -     ggtitle ("Midichlorian counts for Jedi and Sith",
    -            subtitle = "In four U.S. cities") +
    - 
    -            labs(caption  = paste0("\nMidichlorian counts for two tribes across ",
    -                                   "four locations. Boxes indicate \n",
    -                                   "the EM mean. ",
    -                                   "Error bars indicate the 95% confidence ",
    -                                   "interval ",
    -                                    "of the EM \n",
    -                                   "mean. Means sharing a letter are ",
    -                                   "not significantly different \n",
    -                                   "(Tukey-adjusted comparisons)."),
    -                            hjust=0.5) +
    - 
    -  geom_text(nudge_x = c(0.1, -0.1, 0.1, -0.1, 0.1, -0.1, -0.1, 0.1),
    -            nudge_y = c(4.5,  4.5, 4.5,  4.5, 4.5 , 4.5,  4.5, 4.5),
    -            color   = "black") +
    - 
    -  scale_color_manual(values = c("blue", "red"))
    +
    contrast(m2, method = list(A1 - A2) )
    +
    +
     contrast       estimate   SE df t.ratio p.value
    + c(1, -1, 0, 0)    -19.5 4.44 45  -4.396  0.0001
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +
    +
    contrast(m2, method = list(A1 - A3) )
    +
    +
     contrast       estimate   SE df t.ratio p.value
    + c(1, 0, -1, 0)    -34.8 4.44 45  -7.853  <.0001
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +
    +
    contrast(m2, method = list(A1 - A4) )
    +
    +
     contrast       estimate   SE df t.ratio p.value
    + c(1, 0, 0, -1)      -44 4.44 45  -9.919  <.0001
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +
    + +
    +

    Using custom contrasts is strongly recommended instead of pairs() when you are comparing multiple treatment groups (>5).

    +
    -
    -

    12.4.2 Contrasts using Emmeans

    +
    +

    12.4 Compact letter displays

    +

    Compact letter displays (CLDs) are a popular way to display multiple comparisons when there are more than few group means to compare. However, they are problematic as they are more prone to misinterpretation. The R package multcompView (Graves et al., 2019) provides an implementation of CLDs creating a display where any two means associated with same symbol are not statistically different.

    +

    The cld() function from the multcomp package is used to implement CLDs in the form of symbols or letters. The emmeans package provides a emmGrid objects for cld() method.

    +

    Let’s start evaluating CLDs for main effects. We will use emmean objects m1 and m2 for this. In the output below, groups sharing a letter in the .group are not statistically different from each other.

    -
    (warp.emm <- emmeans(warp.lm, ~ tension | wool))
    -
    -contrast(warp.emm, "poly")
    +
    cld(m1, alpha=0.05, Letters=letters)
    +
    +
     V           emmean  SE df lower.CL upper.CL .group
    + Victory       97.6 7.8  5     77.6      118  a    
    + Golden.rain  104.5 7.8  5     84.5      125  a    
    + Marvellous   109.8 7.8  5     89.7      130  a    
    +
    +Results are averaged over the levels of: N 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +P value adjustment: tukey method for comparing a family of 3 estimates 
    +significance level used: alpha = 0.05 
    +NOTE: If two or more means share the same grouping symbol,
    +      then we cannot show them to be different.
    +      But we also did not show them to be the same. 
    +
    -

    Compact letter display

    -
    #library(multcomp); library(multcompView)
    +
    cld(m2, alpha=0.05, Letters=letters)
    +
    +
     N      emmean   SE df lower.CL upper.CL .group
    + 0.0cwt   79.4 7.17  5     60.9     97.8  a    
    + 0.2cwt   98.9 7.17  5     80.4    117.3   b   
    + 0.4cwt  114.2 7.17  5     95.8    132.7    c  
    + 0.6cwt  123.4 7.17  5    104.9    141.8    c  
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +P value adjustment: tukey method for comparing a family of 4 estimates 
    +significance level used: alpha = 0.05 
    +NOTE: If two or more means share the same grouping symbol,
    +      then we cannot show them to be different.
    +      But we also did not show them to be the same. 
    +
    +

    Let’s have a look at the CLDs for the interaction effect:

    -
    #cld(m1, Letters= letters)
    +
    cld3 <- cld(m3, alpha=0.05, Letters=letters)
    +cld3
    +
    +
     V           N      emmean   SE df lower.CL upper.CL .group    
    + Victory     0.0cwt   71.5 9.11  5     48.1     94.9  a        
    + Golden.rain 0.0cwt   80.0 9.11  5     56.6    103.4  abcde    
    + Marvellous  0.0cwt   86.7 9.11  5     63.3    110.1  abc  fg  
    + Victory     0.2cwt   89.7 9.11  5     66.3    113.1  ab d f h 
    + Golden.rain 0.2cwt   98.5 9.11  5     75.1    121.9  abcdefghi
    + Marvellous  0.2cwt  108.5 9.11  5     85.1    131.9  abcdefghi
    + Victory     0.4cwt  110.8 9.11  5     87.4    134.2   bcdefghi
    + Golden.rain 0.4cwt  114.7 9.11  5     91.3    138.1       fghi
    + Marvellous  0.4cwt  117.2 9.11  5     93.8    140.6     de  hi
    + Victory     0.6cwt  118.5 9.11  5     95.1    141.9    c e g i
    + Golden.rain 0.6cwt  124.8 9.11  5    101.4    148.2       fghi
    + Marvellous  0.6cwt  126.8 9.11  5    103.4    150.2         hi
    +
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +P value adjustment: tukey method for comparing a family of 12 estimates 
    +significance level used: alpha = 0.05 
    +NOTE: If two or more means share the same grouping symbol,
    +      then we cannot show them to be different.
    +      But we also did not show them to be the same. 
    -

    The letters indicating significant differences can be generated using cld() function from the ‘multcomp’ package”. In the output below, groups sharing a letter in the .group are not statistically different from each other.

    -
    + +

    Interpretation of these letters is: Here we have a significant difference in grain yield with varieties “victory”, with N treatments of 0.0cwt, 0.2cwt, 0.4cwt, and 0.6wt. Grain yield for Golden.rain variety was significantly lower with 0.0cwt N treatment compared to the 0.2cwt, 0.4cwt, and 0.6wt treatments.

    +

    In the data set we used for demonstration here, we had equal number of observations in each group. However, this might not be a case every time as it is common to have missing values in the data set. In such cases, readers usually struggle to interpret significant differences among groups. For example, estimated means of two groups are substantially different but they are no statistically different. This normally happens when SE of one group is large due to its small sample size, so it’s hard for it to be statistically different from other groups. In such cases, we can use alternatives to CLDs as shown below.

    -
    -

    12.5 using adjusted P-values

    -

    P values, “significance”, and recommendations : https://cran.r-project.org/web/packages/emmeans/vignettes/basics.html#emms

    -

    Summary of main points EMMs are derived from a model. A different model for the same data may lead to different EMMs. EMMs are based on a reference grid consisting of all combinations of factor levels, with each covariate set to its average (by default). For purposes of defining the reference grid, dimensions of a multivariate response are treated as levels of a factor. EMMs are then predictions on this reference grid, or marginal averages thereof (equally weighted by default). Reference grids may be modified using at or other arguments for ref_grid() Reference grids and emmeans() results may be plotted via plot() (for parallel confidence intervals) or emmip() (for an interaction-style plot). Be cautious with the terms “significant” and “nonsignificant”, and don’t ever interpret a “nonsignificant” result as saying that there is no effect. Follow good statistical practices such as getting the model right first, and using adjusted P values for appropriately chosen families of comparisons or contrasts.

    +
    +

    12.5 Alternatives to CLD

    +
      +
    1. Equivalence test
    2. +
    +

    Let’s assume based on subject matter considerations, if mean yield of two groups differ by less than 30 can be considered equivalent. Let’s try equivalence test on clds of nitrogen treatment emmeans (m2)

    +
    +
    cld(m2, delta = 30, adjust = "none")
    +
    +
     N      emmean   SE df lower.CL upper.CL .equiv.set
    + 0.0cwt   79.4 7.17  5     60.9     97.8  1        
    + 0.2cwt   98.9 7.17  5     80.4    117.3  12       
    + 0.4cwt  114.2 7.17  5     95.8    132.7   23      
    + 0.6cwt  123.4 7.17  5    104.9    141.8    3      
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +Statistics are tests of equivalence with a threshold of 30 
    +P values are left-tailed 
    +significance level used: alpha = 0.05 
    +Estimates sharing the same symbol test as equivalent 
    +
    +
    +

    Here, two treatment groups ‘0.0cwt’ and ‘0.2cwt’, ‘0.4cwt’ and ‘0.6cwt’ can be considered equivalent.

    +
      +
    1. Significance Sets
    2. +
    +

    Another alternative is to simply reverse all the boolean flags we used in constructing CLDs for m3 first time.

    +
    +
    cld(m2, signif = TRUE)
    +
    +
     N      emmean   SE df lower.CL upper.CL .signif.set
    + 0.0cwt   79.4 7.17  5     60.9     97.8  12        
    + 0.2cwt   98.9 7.17  5     80.4    117.3  12        
    + 0.4cwt  114.2 7.17  5     95.8    132.7  1         
    + 0.6cwt  123.4 7.17  5    104.9    141.8   2        
    +
    +Results are averaged over the levels of: V 
    +Degrees-of-freedom method: containment 
    +Confidence level used: 0.95 
    +P value adjustment: tukey method for comparing a family of 4 estimates 
    +significance level used: alpha = 0.05 
    +Estimates sharing the same symbol are significantly different 
    +
    +
    @@ -629,6 +753,72 @@

    See Section 2.0.4 for additional warnings about problems with using compact letter display.

    +
    +
    +

    12.6 Export emmeans to excel sheet

    +

    The outputs from emmeans() or cld() objects can exported by firstly converting outputs to a data frame and then using writexlsx() function from the ‘writexl’ package to export the outputs.

    +
    +
    result_n <- as.data.frame(summary(m1))
    +
    +
    +
    writexl::write_xlsx(result_n)
    +
    +
    +
    +

    12.7 Graphical display of emmeans

    +

    The results of emmeans() object can be plotted in two different ways. Firstly, we can use base plot() function in R.

    +
    +
    plot(m1)
    +
    +
    +
    +

    +
    +
    +
    +
    plot(m4)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Or we can use ‘ggplot2’ library. We can plot cld3 object in ggplot, with Variety on x-axis and estimated means of yield on y-axis. Different N treatments are presented in groups of different colors.

    +
    +
    ggplot(cld3) +
    +  aes(x = V, y = emmean, color = N) +
    +  geom_point(position = position_dodge(width = 0.9)) +
    +  geom_errorbar(mapping = aes(ymin = lower.CL, ymax = upper.CL), 
    +                              position = position_dodge(width = 1),
    +                width = 0.1) +
    +  geom_text(mapping = aes(label = .group, y = upper.CL * 1.05), 
    +            position = position_dodge(width = 0.8), 
    +            show.legend = F)+
    +  theme_bw()+
    +  theme(axis.text= element_text(color = "black",
    +                                size =12))
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Recall: groups that do not differ significantly from each other share the same letter.

    +

    we can also use emmip() built in emmeans package to look at the trend in interaction of variety and nitrogen factors.

    +
    +
    emmip(model1, N ~ V)
    +
    +
    +
    +

    +
    +
    +
    +
    @@ -642,6 +832,24 @@

    If you want to read more about emmeans, please refer to vignettes on this CRAN page.

    +
    +
    +

    12.8 Conclusion

    +

    Be cautious with the terms “significant” and “nonsignificant”, and don’t ever interpret a “nonsignificant” result as saying that there is no effect. Follow good statistical practices such as getting the model right first, and using adjusted P values for appropriately chosen families of comparisons or contrasts.

    +
    +
    +
    + +
    +
    +P values, “significance”, and recommendations +
    +
    +
    +

    P values are often misinterpreted, and the term “statistical significance” can be misleading. Please refer to this link to read more about basic principles outlined by the American Statistical Association when considering p-values.

    +

    If you want to read more about emmeans, please refer to vignettes on this CRAN page.

    +
    +
    diff --git a/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-22-1.png b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-22-1.png new file mode 100644 index 0000000..9c96a2f Binary files /dev/null and b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-22-1.png differ diff --git a/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-22-2.png b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-22-2.png new file mode 100644 index 0000000..0835615 Binary files /dev/null and b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-22-2.png differ diff --git a/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-23-1.png b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-23-1.png new file mode 100644 index 0000000..ebb4dd1 Binary files /dev/null and b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-23-1.png differ diff --git a/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-24-1.png b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-24-1.png new file mode 100644 index 0000000..447c83c Binary files /dev/null and b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-24-1.png differ diff --git a/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-4-1.png b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 0000000..d554abd Binary files /dev/null and b/chapters/means-and-contrasts_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/chapters/variance-components.html b/chapters/variance-components.html index dddba8b..de607ce 100644 --- a/chapters/variance-components.html +++ b/chapters/variance-components.html @@ -310,6 +310,7 @@

    13 
    library(nlme); library(emmeans); library(performance)
     library(lme4)
    diff --git a/search.json b/search.json index 2c12fc2..baed886 100644 --- a/search.json +++ b/search.json @@ -384,7 +384,17 @@ "href": "chapters/means-and-contrasts.html#analysis-examples", "title": "12  Marginal Means & Contrasts", "section": "12.2 Analysis Examples", - "text": "12.2 Analysis Examples\n\nlibrary(nlme); library(performance); library(emmeans)\nlibrary(dplyr); library(broom.mixed)\n\n\n12.2.1 Import data\nLet’s import oats data from the MASS package.\n\n\nTo read more about data and model fitting explanation please refer to Chapter 6.\n\ndata1 <- MASS::oats\n\n\n\n12.2.2 Model fitting\n\nmodel1 <- lme(Y ~ V + N + V:N ,\n random = ~1|B/V,\n data = data1, \n na.action = na.exclude)\ntidy(model1)\n\n\n\n12.2.3 Check Model Assumptions\n\ncheck_model(model1, check = c('normality', 'linearity'))\n\n\n\n12.2.4 Model Inference\n\nanova(model1, type = \"marginal\")\n\n\n\n12.2.5 Estimated Marginal Means\nNow that we have a good model, let’s use the emmeans() function to obtain EMMs.\nNow that we have fitted a linear mixed model (model1) and it meets the model assumption. Let’s use the emmeans() function to obtain estimated marginal means for main (variety and nitrogen) and interaction (variety x nitrogen) effects.\n\n12.2.5.1 Main effects\n\nm1 <- emmeans(model1, ~V, level = 0.95)\nm1\n\n\nm2 <- emmeans(model1, ~N)\nm2\n\n\n\n12.2.5.2 Interaction effects\n\nm3 <- emmeans(model1, ~V*N)\nm3\n\n\nm4 <- emmeans(model1, ~V|N)\nm4", + "text": "12.2 Analysis Examples\n\nlibrary(nlme); library(performance); library(emmeans)\nlibrary(dplyr); library(broom.mixed); library(multcompView)\nlibrary(multcomp); library(ggplot2)\n\n\n12.2.1 Import data\nLet’s import oats data from the MASS package.\n\n\nTo read more about data and model fitting explanation please refer to Chapter 6.\n\ndata1 <- MASS::oats\n\n\n\n12.2.2 Model fitting\n\nmodel1 <- lme(Y ~ V + N + V:N ,\n random = ~1|B/V,\n data = data1, \n na.action = na.exclude)\ntidy(model1)\n\nWarning in tidy.lme(model1): ran_pars not yet implemented for multiple levels\nof nesting\n\n\n# A tibble: 12 × 7\n effect term estimate std.error df statistic p.value\n <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>\n 1 fixed (Intercept) 80 9.11 45 8.78 2.56e-11\n 2 fixed VMarvellous 6.67 9.72 10 0.686 5.08e- 1\n 3 fixed VVictory -8.50 9.72 10 -0.875 4.02e- 1\n 4 fixed N0.2cwt 18.5 7.68 45 2.41 2.02e- 2\n 5 fixed N0.4cwt 34.7 7.68 45 4.51 4.58e- 5\n 6 fixed N0.6cwt 44.8 7.68 45 5.84 5.48e- 7\n 7 fixed VMarvellous:N0.2cwt 3.33 10.9 45 0.307 7.60e- 1\n 8 fixed VVictory:N0.2cwt -0.333 10.9 45 -0.0307 9.76e- 1\n 9 fixed VMarvellous:N0.4cwt -4.17 10.9 45 -0.383 7.03e- 1\n10 fixed VVictory:N0.4cwt 4.67 10.9 45 0.430 6.70e- 1\n11 fixed VMarvellous:N0.6cwt -4.67 10.9 45 -0.430 6.70e- 1\n12 fixed VVictory:N0.6cwt 2.17 10.9 45 0.199 8.43e- 1\n\n\n\n\n12.2.3 Check Model Assumptions\n\ncheck_model(model1, check = c('normality', 'linearity'))\n\n\n\n\n\n\n\n\n\n\n12.2.4 Model Inference\n\nanova(model1, type = \"marginal\")\n\n numDF denDF F-value p-value\n(Intercept) 1 45 77.16729 <.0001\nV 2 10 1.22454 0.3344\nN 3 45 13.02273 <.0001\nV:N 6 45 0.30282 0.9322\n\n\n\n\n12.2.5 Estimated Marginal Means\nNow that we have fitted a linear mixed model (model1) and it meets the model assumption. Let’s use the emmeans() function to obtain estimated marginal means for main (variety and nitrogen) and interaction (variety x nitrogen) effects.\n\n12.2.5.1 Main effects\n\nm1 <- emmeans(model1, ~V, level = 0.95)\n\nNOTE: Results may be misleading due to involvement in interactions\n\nm1\n\n V emmean SE df lower.CL upper.CL\n Golden.rain 104.5 7.8 5 84.5 125\n Marvellous 109.8 7.8 5 89.7 130\n Victory 97.6 7.8 5 77.6 118\n\nResults are averaged over the levels of: N \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \n\n\n\nm2 <- emmeans(model1, ~N)\n\nNOTE: Results may be misleading due to involvement in interactions\n\nm2\n\n N emmean SE df lower.CL upper.CL\n 0.0cwt 79.4 7.17 5 60.9 97.8\n 0.2cwt 98.9 7.17 5 80.4 117.3\n 0.4cwt 114.2 7.17 5 95.8 132.7\n 0.6cwt 123.4 7.17 5 104.9 141.8\n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \n\n\nMake sure to read and interpret EMMs carefully. Here, when we calculated EMMs for main effects of V and N, these were averaged over the levels of other factor in experiment. For example, estimated means for each variety were averaged over it’s N treatments, respectively.\n\n\n12.2.5.2 Interaction effects\nNow let’s evaluate the interaction effect EMMs for V and N. These can be calculated either using V*N or V|N.\n\nm3 <- emmeans(model1, ~V*N)\nm3\n\n V N emmean SE df lower.CL upper.CL\n Golden.rain 0.0cwt 80.0 9.11 5 56.6 103.4\n Marvellous 0.0cwt 86.7 9.11 5 63.3 110.1\n Victory 0.0cwt 71.5 9.11 5 48.1 94.9\n Golden.rain 0.2cwt 98.5 9.11 5 75.1 121.9\n Marvellous 0.2cwt 108.5 9.11 5 85.1 131.9\n Victory 0.2cwt 89.7 9.11 5 66.3 113.1\n Golden.rain 0.4cwt 114.7 9.11 5 91.3 138.1\n Marvellous 0.4cwt 117.2 9.11 5 93.8 140.6\n Victory 0.4cwt 110.8 9.11 5 87.4 134.2\n Golden.rain 0.6cwt 124.8 9.11 5 101.4 148.2\n Marvellous 0.6cwt 126.8 9.11 5 103.4 150.2\n Victory 0.6cwt 118.5 9.11 5 95.1 141.9\n\nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \n\n\n\nm4 <- emmeans(model1, ~V|N)\nm4\n\nN = 0.0cwt:\n V emmean SE df lower.CL upper.CL\n Golden.rain 80.0 9.11 5 56.6 103.4\n Marvellous 86.7 9.11 5 63.3 110.1\n Victory 71.5 9.11 5 48.1 94.9\n\nN = 0.2cwt:\n V emmean SE df lower.CL upper.CL\n Golden.rain 98.5 9.11 5 75.1 121.9\n Marvellous 108.5 9.11 5 85.1 131.9\n Victory 89.7 9.11 5 66.3 113.1\n\nN = 0.4cwt:\n V emmean SE df lower.CL upper.CL\n Golden.rain 114.7 9.11 5 91.3 138.1\n Marvellous 117.2 9.11 5 93.8 140.6\n Victory 110.8 9.11 5 87.4 134.2\n\nN = 0.6cwt:\n V emmean SE df lower.CL upper.CL\n Golden.rain 124.8 9.11 5 101.4 148.2\n Marvellous 126.8 9.11 5 103.4 150.2\n Victory 118.5 9.11 5 95.1 141.9\n\nDegrees-of-freedom method: containment \nConfidence level used: 0.95", + "crumbs": [ + "12  Marginal Means and Contrasts" + ] + }, + { + "objectID": "chapters/means-and-contrasts.html#contrasts-using-emmeans", + "href": "chapters/means-and-contrasts.html#contrasts-using-emmeans", + "title": "12  Marginal Means & Contrasts", + "section": "12.3 Contrasts using emmeans", + "text": "12.3 Contrasts using emmeans\nFirstly, the pairs() function from emmeans package can be used to evaluate the pairwise comparison among treatment objects. The emmean object (m1, m2) will be passed through pairs() function which will provide a p-value adjustment equivalent to the Tukey test.\n\npairs(m1, adjust = \"tukey\")\n\n contrast estimate SE df t.ratio p.value\n Golden.rain - Marvellous -5.29 7.08 10 -0.748 0.7419\n Golden.rain - Victory 6.88 7.08 10 0.971 0.6104\n Marvellous - Victory 12.17 7.08 10 1.719 0.2458\n\nResults are averaged over the levels of: N \nDegrees-of-freedom method: containment \nP value adjustment: tukey method for comparing a family of 3 estimates \n\n\n\npairs(m2)\n\n contrast estimate SE df t.ratio p.value\n 0.0cwt - 0.2cwt -19.50 4.44 45 -4.396 0.0004\n 0.0cwt - 0.4cwt -34.83 4.44 45 -7.853 <.0001\n 0.0cwt - 0.6cwt -44.00 4.44 45 -9.919 <.0001\n 0.2cwt - 0.4cwt -15.33 4.44 45 -3.457 0.0064\n 0.2cwt - 0.6cwt -24.50 4.44 45 -5.523 <.0001\n 0.4cwt - 0.6cwt -9.17 4.44 45 -2.067 0.1797\n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \nP value adjustment: tukey method for comparing a family of 4 estimates \n\n\nHere if we look at the results from code chunk above, it’s easy to interpret results from pairs() function in case of varietey comparison becuase there were only 3 groups. It’s bit confusing in case of Nitrogen treatments where we had 4 groups. We can further simplify it by using custom contrasts.\n\n\n\n\n\n\npairs()\n\n\n\nRemember!! The pairs() function can be used to calculate pairwise comparison when treatment groups are less than equal to 3.\n\n\n\n12.3.1 Custom contrasts\nFirstly, let’s run emmean object ‘m2’ for nitrogen treatment comparison.\n\nm2\n\n N emmean SE df lower.CL upper.CL\n 0.0cwt 79.4 7.17 5 60.9 97.8\n 0.2cwt 98.9 7.17 5 80.4 117.3\n 0.4cwt 114.2 7.17 5 95.8 132.7\n 0.6cwt 123.4 7.17 5 104.9 141.8\n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \n\n\nNow, lets a create a vector for each nitrogen treatment in the same order as presented in output from m2.\n\nA1 = c(1, 0, 0, 0)\nA2 = c(0, 1, 0, 0)\nA3 = c(0, 0, 1, 0)\nA4 = c(0, 0, 0, 1)\n\nThese vectors (A1, A2, A3, A4) represent each Nitrogen treatment in an order as presented in m2 emmeans object. A1, A2, and A3, A4 vectors represents 0.0cwt, 0.2cwt, 0.4cwt, and 0.6cwt treatments, respectively.\nLet’s create custom contrasts for comparing ‘0.0cwt’ (A1) treatment to ‘0.2cwt’ (A2), ‘0.4cwt’ (A3), and ‘0.6cwt’ (A4) treatments. This can be evaluated as shown below: Here the output shows the difference in mean yield between these two varieties\n\ncontrast(m2, method = list(A1 - A2) )\n\n contrast estimate SE df t.ratio p.value\n c(1, -1, 0, 0) -19.5 4.44 45 -4.396 0.0001\n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \n\ncontrast(m2, method = list(A1 - A3) )\n\n contrast estimate SE df t.ratio p.value\n c(1, 0, -1, 0) -34.8 4.44 45 -7.853 <.0001\n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \n\ncontrast(m2, method = list(A1 - A4) )\n\n contrast estimate SE df t.ratio p.value\n c(1, 0, 0, -1) -44 4.44 45 -9.919 <.0001\n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \n\n\n\n\nUsing custom contrasts is strongly recommended instead of pairs() when you are comparing multiple treatment groups (>5).", "crumbs": [ "12  Marginal Means and Contrasts" ] @@ -393,8 +403,8 @@ "objectID": "chapters/means-and-contrasts.html#compact-letter-displays", "href": "chapters/means-and-contrasts.html#compact-letter-displays", "title": "12  Marginal Means & Contrasts", - "section": "12.3 Compact letter displays", - "text": "12.3 Compact letter displays\nCompact letter displays (CLDs) are a popular way to display multiple comparisons when there are more than few group means to compare. However, they are problematic as they are more prone to misinterpretation. The R package multcompView (Graves et al., 2019) provides an implementation of CLDs creating a display where any two means associated with same symbol are not statistically different.\nThe cld() function from the multcomp package is used to implement CLDs in the form of symbols or letters. The emmeans package provides a emmGrid objects for cld() method.\nLet’s start evaluating CLDs for main effects. We will use emmean objects m1 and m2 for this.\n\ncld(m1, alpha=0.05, Letters=letters)\n\n\ncld(m2, alpha=0.05, Letters=letters)\n\nLet’s have a look at the CLDs for the interaction effect:\n\ncld(m3, alpha=0.05, Letters=letters)\n\nInterpreation of these letters is: Here we have a significant difference in grain yield with varieties “victory”, with N treatments of 0.0cwt, 0.2cwt, 0.4cwt, and 0.6wt. Grain yield for Golden.rain variety was significantly lower with 0.0cwt N treatment compared to the 0.2cwt, 0.4cwt, and 0.6wt treatments.", + "section": "12.4 Compact letter displays", + "text": "12.4 Compact letter displays\nCompact letter displays (CLDs) are a popular way to display multiple comparisons when there are more than few group means to compare. However, they are problematic as they are more prone to misinterpretation. The R package multcompView (Graves et al., 2019) provides an implementation of CLDs creating a display where any two means associated with same symbol are not statistically different.\nThe cld() function from the multcomp package is used to implement CLDs in the form of symbols or letters. The emmeans package provides a emmGrid objects for cld() method.\nLet’s start evaluating CLDs for main effects. We will use emmean objects m1 and m2 for this. In the output below, groups sharing a letter in the .group are not statistically different from each other.\n\ncld(m1, alpha=0.05, Letters=letters)\n\n V emmean SE df lower.CL upper.CL .group\n Victory 97.6 7.8 5 77.6 118 a \n Golden.rain 104.5 7.8 5 84.5 125 a \n Marvellous 109.8 7.8 5 89.7 130 a \n\nResults are averaged over the levels of: N \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \nP value adjustment: tukey method for comparing a family of 3 estimates \nsignificance level used: alpha = 0.05 \nNOTE: If two or more means share the same grouping symbol,\n then we cannot show them to be different.\n But we also did not show them to be the same. \n\n\n\ncld(m2, alpha=0.05, Letters=letters)\n\n N emmean SE df lower.CL upper.CL .group\n 0.0cwt 79.4 7.17 5 60.9 97.8 a \n 0.2cwt 98.9 7.17 5 80.4 117.3 b \n 0.4cwt 114.2 7.17 5 95.8 132.7 c \n 0.6cwt 123.4 7.17 5 104.9 141.8 c \n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \nP value adjustment: tukey method for comparing a family of 4 estimates \nsignificance level used: alpha = 0.05 \nNOTE: If two or more means share the same grouping symbol,\n then we cannot show them to be different.\n But we also did not show them to be the same. \n\n\nLet’s have a look at the CLDs for the interaction effect:\n\ncld3 <- cld(m3, alpha=0.05, Letters=letters)\ncld3\n\n V N emmean SE df lower.CL upper.CL .group \n Victory 0.0cwt 71.5 9.11 5 48.1 94.9 a \n Golden.rain 0.0cwt 80.0 9.11 5 56.6 103.4 abcde \n Marvellous 0.0cwt 86.7 9.11 5 63.3 110.1 abc fg \n Victory 0.2cwt 89.7 9.11 5 66.3 113.1 ab d f h \n Golden.rain 0.2cwt 98.5 9.11 5 75.1 121.9 abcdefghi\n Marvellous 0.2cwt 108.5 9.11 5 85.1 131.9 abcdefghi\n Victory 0.4cwt 110.8 9.11 5 87.4 134.2 bcdefghi\n Golden.rain 0.4cwt 114.7 9.11 5 91.3 138.1 fghi\n Marvellous 0.4cwt 117.2 9.11 5 93.8 140.6 de hi\n Victory 0.6cwt 118.5 9.11 5 95.1 141.9 c e g i\n Golden.rain 0.6cwt 124.8 9.11 5 101.4 148.2 fghi\n Marvellous 0.6cwt 126.8 9.11 5 103.4 150.2 hi\n\nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \nP value adjustment: tukey method for comparing a family of 12 estimates \nsignificance level used: alpha = 0.05 \nNOTE: If two or more means share the same grouping symbol,\n then we cannot show them to be different.\n But we also did not show them to be the same. \n\n\nInterpretation of these letters is: Here we have a significant difference in grain yield with varieties “victory”, with N treatments of 0.0cwt, 0.2cwt, 0.4cwt, and 0.6wt. Grain yield for Golden.rain variety was significantly lower with 0.0cwt N treatment compared to the 0.2cwt, 0.4cwt, and 0.6wt treatments.\nIn the data set we used for demonstration here, we had equal number of observations in each group. However, this might not be a case every time as it is common to have missing values in the data set. In such cases, readers usually struggle to interpret significant differences among groups. For example, estimated means of two groups are substantially different but they are no statistically different. This normally happens when SE of one group is large due to its small sample size, so it’s hard for it to be statistically different from other groups. In such cases, we can use alternatives to CLDs as shown below.", "crumbs": [ "12  Marginal Means and Contrasts" ] @@ -403,18 +413,38 @@ "objectID": "chapters/means-and-contrasts.html#alternatives-to-cld", "href": "chapters/means-and-contrasts.html#alternatives-to-cld", "title": "12  Marginal Means & Contrasts", - "section": "12.4 Alternatives to CLD", - "text": "12.4 Alternatives to CLD\n\nOne-way estimated marginal means and plot\n\nlibrary(multcomp)\nlibrary(emmeans)\n\nmarginal = emmeans(model, ~ Location)\n\nCLD = cld(marginal,\n alpha=0.05,\n Letters=letters,\n adjust=\"tukey\")\n\nCLD\n\n\nLocation emmean SE df lower.CL upper.CL .group\n Olympia 8.333333 0.6718548 16 6.449596 10.21707 a \n Northampton 11.833333 0.6718548 16 9.949596 13.71707 b \n Ventura 13.333333 0.6718548 16 11.449596 15.21707 b \n Burlington 21.833333 0.6718548 16 19.949596 23.71707 c \n\n\n\n### Order the levels for printing\n\nCLD$Location = factor(CLD$Location,\n levels=c(\"Olympia\", \"Ventura\", \"Northampton\", \"Burlington\"))\n\n### Remove spaces in .group \n\nCLD$.group=gsub(\" \", \"\", CLD$.group)\n\n\n### Plot\n\nlibrary(ggplot2)\n\nggplot(CLD,\n aes(x = Location,\n y = emmean,\n label = .group)) +\n\n geom_point(shape = 15,\n size = 4) +\n\n geom_errorbar(aes(ymin = lower.CL,\n ymax = upper.CL),\n width = 0.2,\n size = 0.7) +\n\n theme_bw() +\n theme(axis.title = element_text(face = \"bold\"),\n axis.text = element_text(face = \"bold\"),\n plot.caption = element_text(hjust = 0)) +\n\n ylab(\"Estimated marginal mean\\nmidichlorian count\") +\n ggtitle (\"Midichlorian counts\",\n\n subtitle = \"In four U.S. cities\") +\n\n labs(caption = paste0(\"\\nMidichlorian counts for four locations. \",\n \"Boxes indicate the EM mean. \\n\",\n \"Error bars indicate the 95% \",\n \"confidence interval of the EM mean. \\n\",\n \"Means sharing a letter are not \",\n \"significantly different (Tukey-adjusted \\n\",\n \"comparisons).\"),\n hjust=0.5) +\n\n geom_text(nudge_x = c(0, 0, 0, 0),\n nudge_y = c(4, 4, 4, 4),\n color = \"black\")\n\n\n12.4.1 Interactions using Emmeans\n\nInteraction plot of estimated marginal means\n\nlibrary(multcomp)\nlibrary(emmeans)\n\nmarginal = emmeans(model,\n ~ Tribe:Location)\n\nCLD = cld(marginal,\n alpha=0.05,\n Letters=letters,\n adjust=\"tukey\")\n\nCLD\n\n\nTribe Location emmean SE df lower.CL upper.CL .group\n Sith Olympia 4.333333 0.9501462 16 1.354477 7.31219 a \n Jedi Northampton 8.666667 0.9501462 16 5.687810 11.64552 ab \n Sith Ventura 10.666667 0.9501462 16 7.687810 13.64552 bc \n Jedi Olympia 12.333333 0.9501462 16 9.354477 15.31219 bcd\n Sith Northampton 15.000000 0.9501462 16 12.021143 17.97886 cd\n Jedi Ventura 16.000000 0.9501462 16 13.021143 18.97886 d\n Jedi Burlington 20.666667 0.9501462 16 17.687810 23.64552 e\n Sith Burlington 23.000000 0.9501462 16 20.021143 25.97886 e\n\n\n\n### Order the levels for printing\n\nCLD$Location = factor(CLD$Location,\n levels=c(\"Olympia\", \"Ventura\", \"Northampton\", \"Burlington\"))\n\nCLD$Tribe = factor(CLD$Tribe,\n levels=c(\"Jedi\", \"Sith\"))\n\n### Remove spaces in .group \n\nCLD$.group=gsub(\" \", \"\", CLD$.group)\n\n\nCLD\n\n\n### Plot\n\nlibrary(ggplot2)\n\npd = position_dodge(0.4) ### How much to jitter the points on the plot\n\nggplot(CLD,\n aes(x = Location,\n y = emmean,\n color = Tribe,\n label = .group)) +\n\n geom_point(shape = 15,\n size = 4,\n position = pd) +\n\n geom_errorbar(aes(ymin = lower.CL,\n ymax = upper.CL),\n width = 0.2,\n size = 0.7,\n position = pd) +\n\n theme_bw() +\n theme(axis.title = element_text(face = \"bold\"),\n axis.text = element_text(face = \"bold\"),\n plot.caption = element_text(hjust = 0)) +\n\n ylab(\"Estimated marginal mean\\nmidichlorian count\") +\n ggtitle (\"Midichlorian counts for Jedi and Sith\",\n subtitle = \"In four U.S. cities\") +\n \n labs(caption = paste0(\"\\nMidichlorian counts for two tribes across \",\n \"four locations. Boxes indicate \\n\",\n \"the EM mean. \",\n \"Error bars indicate the 95% confidence \",\n \"interval \",\n \"of the EM \\n\",\n \"mean. Means sharing a letter are \",\n \"not significantly different \\n\",\n \"(Tukey-adjusted comparisons).\"),\n hjust=0.5) +\n \n geom_text(nudge_x = c(0.1, -0.1, 0.1, -0.1, 0.1, -0.1, -0.1, 0.1),\n nudge_y = c(4.5, 4.5, 4.5, 4.5, 4.5 , 4.5, 4.5, 4.5),\n color = \"black\") +\n \n scale_color_manual(values = c(\"blue\", \"red\"))\n\n\n\n12.4.2 Contrasts using Emmeans\n\n(warp.emm <- emmeans(warp.lm, ~ tension | wool))\n\ncontrast(warp.emm, \"poly\")\n\nCompact letter display\n\n#library(multcomp); library(multcompView)\n\n\n#cld(m1, Letters= letters)\n\nThe letters indicating significant differences can be generated using cld() function from the ‘multcomp’ package”. In the output below, groups sharing a letter in the .group are not statistically different from each other.", + "section": "12.5 Alternatives to CLD", + "text": "12.5 Alternatives to CLD\n\nEquivalence test\n\nLet’s assume based on subject matter considerations, if mean yield of two groups differ by less than 30 can be considered equivalent. Let’s try equivalence test on clds of nitrogen treatment emmeans (m2)\n\ncld(m2, delta = 30, adjust = \"none\")\n\n N emmean SE df lower.CL upper.CL .equiv.set\n 0.0cwt 79.4 7.17 5 60.9 97.8 1 \n 0.2cwt 98.9 7.17 5 80.4 117.3 12 \n 0.4cwt 114.2 7.17 5 95.8 132.7 23 \n 0.6cwt 123.4 7.17 5 104.9 141.8 3 \n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \nStatistics are tests of equivalence with a threshold of 30 \nP values are left-tailed \nsignificance level used: alpha = 0.05 \nEstimates sharing the same symbol test as equivalent \n\n\nHere, two treatment groups ‘0.0cwt’ and ‘0.2cwt’, ‘0.4cwt’ and ‘0.6cwt’ can be considered equivalent.\n\nSignificance Sets\n\nAnother alternative is to simply reverse all the boolean flags we used in constructing CLDs for m3 first time.\n\ncld(m2, signif = TRUE)\n\n N emmean SE df lower.CL upper.CL .signif.set\n 0.0cwt 79.4 7.17 5 60.9 97.8 12 \n 0.2cwt 98.9 7.17 5 80.4 117.3 12 \n 0.4cwt 114.2 7.17 5 95.8 132.7 1 \n 0.6cwt 123.4 7.17 5 104.9 141.8 2 \n\nResults are averaged over the levels of: V \nDegrees-of-freedom method: containment \nConfidence level used: 0.95 \nP value adjustment: tukey method for comparing a family of 4 estimates \nsignificance level used: alpha = 0.05 \nEstimates sharing the same symbol are significantly different \n\n\n\n\n\n\n\n\nCautionary Note about CLD\n\n\n\nIt’s important to note that we cannot conclude that treatment levels with the same letter are the same. We can only conclude that they are not different.\nThere is a separate branch of statistics, “equivalence testing” that is for ascertaining if things are sufficiently similar to conclude they are equivalent.\nSee Section 2.0.4 for additional warnings about problems with using compact letter display.", + "crumbs": [ + "12  Marginal Means and Contrasts" + ] + }, + { + "objectID": "chapters/means-and-contrasts.html#export-emmeans-to-excel-sheet", + "href": "chapters/means-and-contrasts.html#export-emmeans-to-excel-sheet", + "title": "12  Marginal Means & Contrasts", + "section": "12.6 Export emmeans to excel sheet", + "text": "12.6 Export emmeans to excel sheet\nThe outputs from emmeans() or cld() objects can exported by firstly converting outputs to a data frame and then using writexlsx() function from the ‘writexl’ package to export the outputs.\n\nresult_n <- as.data.frame(summary(m1))\n\n\nwritexl::write_xlsx(result_n)", + "crumbs": [ + "12  Marginal Means and Contrasts" + ] + }, + { + "objectID": "chapters/means-and-contrasts.html#graphical-display-of-emmeans", + "href": "chapters/means-and-contrasts.html#graphical-display-of-emmeans", + "title": "12  Marginal Means & Contrasts", + "section": "12.7 Graphical display of emmeans", + "text": "12.7 Graphical display of emmeans\nThe results of emmeans() object can be plotted in two different ways. Firstly, we can use base plot() function in R.\n\nplot(m1)\n\n\n\n\n\n\n\nplot(m4)\n\n\n\n\n\n\n\n\nOr we can use ‘ggplot2’ library. We can plot cld3 object in ggplot, with Variety on x-axis and estimated means of yield on y-axis. Different N treatments are presented in groups of different colors.\n\nggplot(cld3) +\n aes(x = V, y = emmean, color = N) +\n geom_point(position = position_dodge(width = 0.9)) +\n geom_errorbar(mapping = aes(ymin = lower.CL, ymax = upper.CL), \n position = position_dodge(width = 1),\n width = 0.1) +\n geom_text(mapping = aes(label = .group, y = upper.CL * 1.05), \n position = position_dodge(width = 0.8), \n show.legend = F)+\n theme_bw()+\n theme(axis.text= element_text(color = \"black\",\n size =12))\n\n\n\n\n\n\n\n\nRecall: groups that do not differ significantly from each other share the same letter.\nwe can also use emmip() built in emmeans package to look at the trend in interaction of variety and nitrogen factors.\n\nemmip(model1, N ~ V)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nMore details on emmeans\n\n\n\nIf you want to read more about emmeans, please refer to vignettes on this CRAN page.", "crumbs": [ "12  Marginal Means and Contrasts" ] }, { - "objectID": "chapters/means-and-contrasts.html#using-adjusted-p-values", - "href": "chapters/means-and-contrasts.html#using-adjusted-p-values", + "objectID": "chapters/means-and-contrasts.html#conclusion", + "href": "chapters/means-and-contrasts.html#conclusion", "title": "12  Marginal Means & Contrasts", - "section": "12.5 using adjusted P-values", - "text": "12.5 using adjusted P-values\nP values, “significance”, and recommendations : https://cran.r-project.org/web/packages/emmeans/vignettes/basics.html#emms\nSummary of main points EMMs are derived from a model. A different model for the same data may lead to different EMMs. EMMs are based on a reference grid consisting of all combinations of factor levels, with each covariate set to its average (by default). For purposes of defining the reference grid, dimensions of a multivariate response are treated as levels of a factor. EMMs are then predictions on this reference grid, or marginal averages thereof (equally weighted by default). Reference grids may be modified using at or other arguments for ref_grid() Reference grids and emmeans() results may be plotted via plot() (for parallel confidence intervals) or emmip() (for an interaction-style plot). Be cautious with the terms “significant” and “nonsignificant”, and don’t ever interpret a “nonsignificant” result as saying that there is no effect. Follow good statistical practices such as getting the model right first, and using adjusted P values for appropriately chosen families of comparisons or contrasts.\n\n\n\n\n\n\nCautionary Note about CLD\n\n\n\nIt’s important to note that we cannot conclude that treatment levels with the same letter are the same. We can only conclude that they are not different.\nThere is a separate branch of statistics, “equivalence testing” that is for ascertaining if things are sufficiently similar to conclude they are equivalent.\nSee Section 2.0.4 for additional warnings about problems with using compact letter display.\n\n\n\n\n\n\n\n\nMore details on emmeans\n\n\n\nIf you want to read more about emmeans, please refer to vignettes on this CRAN page.", + "section": "12.8 Conclusion", + "text": "12.8 Conclusion\nBe cautious with the terms “significant” and “nonsignificant”, and don’t ever interpret a “nonsignificant” result as saying that there is no effect. Follow good statistical practices such as getting the model right first, and using adjusted P values for appropriately chosen families of comparisons or contrasts.\n\n\n\n\n\n\nP values, “significance”, and recommendations\n\n\n\nP values are often misinterpreted, and the term “statistical significance” can be misleading. Please refer to this link to read more about basic principles outlined by the American Statistical Association when considering p-values.\nIf you want to read more about emmeans, please refer to vignettes on this CRAN page.", "crumbs": [ "12  Marginal Means and Contrasts" ] @@ -424,7 +454,7 @@ "href": "chapters/variance-components.html", "title": "13  Variance & Variance Components", "section": "", - "text": "13.1 Unequal Variance", + "text": "13.1 Unequal Variance\nMixed models provide the advantage of being able to estimate the variance of random variables. The decision of how to assign", "crumbs": [ "13  Variance and Variance Components" ]