From f335cd0c2103f13d2115349431fbdd51de58cd71 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Wed, 26 Jun 2024 13:29:47 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20tidyvers?= =?UTF-8?q?e/dplyr@0005f6768fa765b3bba5148711967d58b6013037=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dev/articles/base.html | 157 ++++++++++++++++------------- dev/articles/colwise.html | 70 +++++++------ dev/articles/dplyr.html | 102 ++++++++++--------- dev/articles/grouping.html | 87 +++++++++------- dev/articles/programming.html | 28 ++--- dev/articles/rowwise.html | 120 ++++++++++++---------- dev/articles/two-table.html | 60 ++++++----- dev/articles/window-functions.html | 56 +++++----- dev/pkgdown.yml | 2 +- dev/reference/compute.html | 4 +- dev/reference/filter_all.html | 4 +- dev/reference/funs.html | 8 +- dev/reference/src_dbi.html | 4 +- dev/search.json | 2 +- 14 files changed, 387 insertions(+), 317 deletions(-) diff --git a/dev/articles/base.html b/dev/articles/base.html index b4b3e3af74..f578243807 100644 --- a/dev/articles/base.html +++ b/dev/articles/base.html @@ -306,8 +306,9 @@

#> 2 5 #> 3 6 #> 4 4 -#> # ℹ 6 more rows -df %>% distinct(x, .keep_all = TRUE) # whole data frame +#> # ℹ 6 more rows +
+df %>% distinct(x, .keep_all = TRUE) # whole data frame
 #> # A tibble: 10 × 2
 #>       x     y
 #>   <int> <int>
@@ -318,7 +319,7 @@ 

#> # ℹ 6 more rows

There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables:

-
+
 unique(df["x"]) # selected columns
 #> # A tibble: 10 × 1
 #>       x
@@ -327,8 +328,9 @@ 

#> 2 5 #> 3 6 #> 4 4 -#> # ℹ 6 more rows -df[!duplicated(df$x), , drop = FALSE] # whole data frame +#> # ℹ 6 more rows

+
+df[!duplicated(df$x), , drop = FALSE] # whole data frame
 #> # A tibble: 10 × 2
 #>       x     y
 #>   <int> <int>
@@ -344,7 +346,7 @@ 

dplyr::filter() selects rows where an expression is TRUE:

-
+
 starwars %>% filter(species == "Human")
 #> # A tibble: 35 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -355,15 +357,17 @@ 

#> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -starwars %>% filter(mass > 1000) +#> # films <list>, vehicles <list>, starships <list>

+
+starwars %>% filter(mass > 1000)
 #> # A tibble: 1 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
 #>   <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
 #> 1 Jabba Des…    175  1358 NA         green-tan… orange           600 herm…
 #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>,
-#> #   films <list>, vehicles <list>, starships <list>
-starwars %>% filter(hair_color == "none" & eye_color == "black")
+#> #   films <list>, vehicles <list>, starships <list>
+
+starwars %>% filter(hair_color == "none" & eye_color == "black")
 #> # A tibble: 9 × 14
 #>   name      height  mass hair_color skin_color  eye_color birth_year sex  
 #>   <chr>      <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr>
@@ -376,7 +380,7 @@ 

#> # films <list>, vehicles <list>, starships <list>

The closest base equivalent (and the inspiration for filter()) is subset():

-
+
 subset(starwars, species == "Human")
 #> # A tibble: 35 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -387,15 +391,17 @@ 

#> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -subset(starwars, mass > 1000) +#> # films <list>, vehicles <list>, starships <list>

+
+subset(starwars, mass > 1000)
 #> # A tibble: 1 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
 #>   <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
 #> 1 Jabba Des…    175  1358 NA         green-tan… orange           600 herm…
 #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>,
-#> #   films <list>, vehicles <list>, starships <list>
-subset(starwars, hair_color == "none" & eye_color == "black")
+#> #   films <list>, vehicles <list>, starships <list>
+
+subset(starwars, hair_color == "none" & eye_color == "black")
 #> # A tibble: 9 × 14
 #>   name      height  mass hair_color skin_color  eye_color birth_year sex  
 #>   <chr>      <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr>
@@ -408,7 +414,7 @@ 

#> # films <list>, vehicles <list>, starships <list>

You can also use [ but this also requires the use of which() to remove NAs:

-
+
 starwars[which(starwars$species == "Human"), , drop = FALSE]
 #> # A tibble: 35 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -419,15 +425,17 @@ 

#> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -starwars[which(starwars$mass > 1000), , drop = FALSE] +#> # films <list>, vehicles <list>, starships <list>

+
+starwars[which(starwars$mass > 1000), , drop = FALSE]
 #> # A tibble: 1 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
 #>   <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
 #> 1 Jabba Des…    175  1358 NA         green-tan… orange           600 herm…
 #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>,
-#> #   films <list>, vehicles <list>, starships <list>
-starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE]
+#> #   films <list>, vehicles <list>, starships <list>
+
+starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE]
 #> # A tibble: 9 × 14
 #>   name      height  mass hair_color skin_color  eye_color birth_year sex  
 #>   <chr>      <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr>
@@ -445,7 +453,7 @@ 

dplyr::mutate() creates new variables from existing variables:

-
+
 df %>% mutate(z = x + y, z2 = z ^ 2)
 #> # A tibble: 100 × 4
 #>       x     y     z    z2
@@ -457,7 +465,7 @@ 

#> # ℹ 96 more rows

The closest base equivalent is transform(), but note that it cannot use freshly created variables:

-
+
 head(transform(df, z = x + y, z2 = (x + y) ^ 2))
 #>   x y  z  z2
 #> 1 7 4 11 121
@@ -467,12 +475,12 @@ 

#> 5 6 3 9 81 #> 6 9 3 12 144

Alternatively, you can use $<-:

-
+
 mtcars$cyl2 <- mtcars$cyl * 2
 mtcars$cyl4 <- mtcars$cyl2 * 2

When applied to a grouped data frame, dplyr::mutate() computes new variable once per group:

-
+
 gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5))
 gf %>% 
   group_by(g) %>% 
@@ -486,7 +494,7 @@ 

#> 3 2 2.5 3 1 #> 4 2 3.5 3 2

To replicate this in base R, you can use ave():

-
+
 transform(gf, 
   x_mean = ave(x, g, FUN = mean), 
   x_rank = ave(x, g, FUN = rank)
@@ -503,21 +511,23 @@ 

dplyr::pull() extracts a variable either by name or position:

-
+
 mtcars %>% pull(1)
 #>  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
-#> [29] 15.8 19.7 15.0 21.4
-mtcars %>% pull(cyl)
+#> [29] 15.8 19.7 15.0 21.4
+
+mtcars %>% pull(cyl)
 #>  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4

This equivalent to [[ for positions and $ for names:

-
+
 mtcars[[1]]
 #>  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
-#> [29] 15.8 19.7 15.0 21.4
-mtcars$cyl
+#> [29] 15.8 19.7 15.0 21.4
+
+mtcars$cyl
 #>  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
@@ -526,7 +536,7 @@

dplyr::relocate() makes it easy to move a set of columns to a new position (by default, the front):

-
+
 # to front
 mtcars %>% relocate(gear, carb) 
 #> # A tibble: 32 × 13
@@ -537,8 +547,9 @@ 

#> 3 4 1 22.8 4 108 93 3.85 2.32 18.6 1 1 8 #> 4 3 1 21.4 6 258 110 3.08 3.22 19.4 1 0 12 #> # ℹ 28 more rows -#> # ℹ 1 more variable: cyl4 <dbl> - +#> # ℹ 1 more variable: cyl4 <dbl>

+
+
 # to back
 mtcars %>% relocate(mpg, cyl, .after = last_col()) 
 #> # A tibble: 32 × 13
@@ -551,7 +562,7 @@ 

#> # ℹ 28 more rows #> # ℹ 1 more variable: cyl <dbl>

We can replicate this in base R with a little set manipulation:

-
+
 mtcars[union(c("gear", "carb"), names(mtcars))]
 #> # A tibble: 32 × 13
 #>    gear  carb   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  cyl2
@@ -561,8 +572,9 @@ 

#> 3 4 1 22.8 4 108 93 3.85 2.32 18.6 1 1 8 #> 4 3 1 21.4 6 258 110 3.08 3.22 19.4 1 0 12 #> # ℹ 28 more rows -#> # ℹ 1 more variable: cyl4 <dbl> - +#> # ℹ 1 more variable: cyl4 <dbl>

+
+
 to_back <- c("mpg", "cyl")
 mtcars[c(setdiff(names(mtcars), to_back), to_back)]
 #> # A tibble: 32 × 13
@@ -583,7 +595,7 @@ 

dplyr::rename() allows you to rename variables by name or position:

-
+
 iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2)
 #> # A tibble: 150 × 5
 #>   sepal_length sepal_width Petal.Length Petal.Width Species
@@ -594,11 +606,11 @@ 

#> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows

Renaming variables by position is straight forward in base R:

-
+
 iris2 <- iris
 names(iris2)[2] <- "sepal_width"

Renaming variables by name requires a bit more work:

-
+
 names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length"
@@ -607,7 +619,7 @@

dplyr::rename_with() transform column names with a function:

-
+
 iris %>% rename_with(toupper)
 #> # A tibble: 150 × 5
 #>   SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
@@ -619,7 +631,7 @@ 

#> # ℹ 146 more rows

A similar effect can be achieved with setNames() in base R:

-
+
 setNames(iris, toupper(names(iris)))
 #> # A tibble: 150 × 5
 #>   SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
@@ -636,7 +648,7 @@ 

dplyr::select() subsets columns by position, name, function of name, or other property:

-
+
 iris %>% select(1:3)
 #> # A tibble: 150 × 3
 #>   Sepal.Length Sepal.Width Petal.Length
@@ -645,8 +657,9 @@ 

#> 2 4.9 3 1.4 #> 3 4.7 3.2 1.3 #> 4 4.6 3.1 1.5 -#> # ℹ 146 more rows -iris %>% select(Species, Sepal.Length) +#> # ℹ 146 more rows

+
+iris %>% select(Species, Sepal.Length)
 #> # A tibble: 150 × 2
 #>   Species Sepal.Length
 #>   <fct>          <dbl>
@@ -654,8 +667,9 @@ 

#> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 -#> # ℹ 146 more rows -iris %>% select(starts_with("Petal")) +#> # ℹ 146 more rows

+
+iris %>% select(starts_with("Petal"))
 #> # A tibble: 150 × 2
 #>   Petal.Length Petal.Width
 #>          <dbl>       <dbl>
@@ -663,8 +677,9 @@ 

#> 2 1.4 0.2 #> 3 1.3 0.2 #> 4 1.5 0.2 -#> # ℹ 146 more rows -iris %>% select(where(is.factor)) +#> # ℹ 146 more rows

+
+iris %>% select(where(is.factor))
 #> # A tibble: 150 × 1
 #>   Species
 #>   <fct>  
@@ -674,7 +689,7 @@ 

#> 4 setosa #> # ℹ 146 more rows

Subsetting variables by position is straightforward in base R:

-
+
 iris[1:3] # single argument selects columns; never drops
 #> # A tibble: 150 × 3
 #>   Sepal.Length Sepal.Width Petal.Length
@@ -683,8 +698,9 @@ 

#> 2 4.9 3 1.4 #> 3 4.7 3.2 1.3 #> 4 4.6 3.1 1.5 -#> # ℹ 146 more rows -iris[1:3, , drop = FALSE] +#> # ℹ 146 more rows

+
+iris[1:3, , drop = FALSE]
 #> # A tibble: 3 × 5
 #>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
 #>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
@@ -692,7 +708,7 @@ 

#> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa

You have two options to subset by name:

-
+
 iris[c("Species", "Sepal.Length")]
 #> # A tibble: 150 × 2
 #>   Species Sepal.Length
@@ -701,8 +717,9 @@ 

#> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 -#> # ℹ 146 more rows -subset(iris, select = c(Species, Sepal.Length)) +#> # ℹ 146 more rows

+
+subset(iris, select = c(Species, Sepal.Length))
 #> # A tibble: 150 × 2
 #>   Species Sepal.Length
 #>   <fct>          <dbl>
@@ -713,7 +730,7 @@ 

#> # ℹ 146 more rows

Subsetting by function of name requires a bit of work with grep():

-
+
 iris[grep("^Petal", names(iris))]
 #> # A tibble: 150 × 2
 #>   Petal.Length Petal.Width
@@ -724,7 +741,7 @@ 

#> 4 1.5 0.2 #> # ℹ 146 more rows

And you can use Filter() to subset by type:

-
+
 Filter(is.factor, iris)
 #> # A tibble: 150 × 1
 #>   Species
@@ -742,7 +759,7 @@ 

dplyr::summarise() computes one or more summaries for each group:

-
+
 mtcars %>% 
   group_by(cyl) %>% 
   summarise(mean = mean(disp), n = n())
@@ -756,7 +773,7 @@ 

Unfortunately by() returns a list of data frames, but you can combine them back together again with do.call() and rbind():

-
+
 mtcars_by <- by(mtcars, mtcars$cyl, function(df) {
   with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df)))
 })
@@ -767,7 +784,7 @@ 

#> 8 8 353.1000 14

aggregate() comes very close to providing an elegant answer:

-
+
 agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x)))
 agg
 #>   cyl disp.mean   disp.n
@@ -777,7 +794,7 @@ 

But unfortunately while it looks like there are disp.mean and disp.n columns, it’s actually a single matrix column:

-
+
 str(agg)
 #> 'data.frame':    3 obs. of  2 variables:
 #>  $ cyl : num  4 6 8
@@ -792,7 +809,7 @@ 

slice(): Choose rows by position

slice() selects rows with their location:

-
+
 slice(mtcars, 25:n())
 #> # A tibble: 8 × 13
 #>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
@@ -804,7 +821,7 @@ 

#> # ℹ 4 more rows #> # ℹ 1 more variable: cyl4 <dbl>

This is straightforward to replicate with [:

-
+
 mtcars[25:nrow(mtcars), , drop = FALSE]
 #> # A tibble: 8 × 13
 #>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2
@@ -879,15 +896,16 @@ 

Filtering joins

dplyr’s semi_join() and anti_join() affect only the rows, not the columns:

-
+
 band_members %>% semi_join(band_instruments)
 #> Joining with `by = join_by(name)`
 #> # A tibble: 2 × 2
 #>   name  band   
 #>   <chr> <chr>  
 #> 1 John  Beatles
-#> 2 Paul  Beatles
-band_members %>% anti_join(band_instruments)
+#> 2 Paul  Beatles
+
+band_members %>% anti_join(band_instruments)
 #> Joining with `by = join_by(name)`
 #> # A tibble: 1 × 2
 #>   name  band  
@@ -895,14 +913,15 @@ 

Filtering joins#> 1 Mick Stones

They can be replicated in base R with [ and %in%:

-
+
 band_members[band_members$name %in% band_instruments$name, , drop = FALSE]
 #> # A tibble: 2 × 2
 #>   name  band   
 #>   <chr> <chr>  
 #> 1 John  Beatles
-#> 2 Paul  Beatles
-band_members[!band_members$name %in% band_instruments$name, , drop = FALSE]
+#> 2 Paul  Beatles
+
+band_members[!band_members$name %in% band_instruments$name, , drop = FALSE]
 #> # A tibble: 1 × 2
 #>   name  band  
 #>   <chr> <chr> 
diff --git a/dev/articles/colwise.html b/dev/articles/colwise.html
index 6567bc701d..539ee813b8 100644
--- a/dev/articles/colwise.html
+++ b/dev/articles/colwise.html
@@ -164,8 +164,9 @@ 

Basic usage#> # A tibble: 1 × 8 #> name hair_color skin_color eye_color sex gender homeworld species #> <int> <int> <int> <int> <int> <int> <int> <int> -#> 1 87 12 31 15 5 3 49 38 - +#> 1 87 12 31 15 5 3 49 38

+ +
+
 starwars %>% 
   group_by(homeworld) %>% 
   filter(n() > 1) %>% 
@@ -194,7 +196,7 @@ 

Basic usageacross() is usually used in combination with summarise() and mutate(), it doesn’t select grouping variables in order to avoid accidentally modifying them:

-
+
 df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9))
 df %>% 
   group_by(g) %>% 
@@ -210,7 +212,7 @@ 

Multiple functions
+
 min_max <- list(
   min = ~min(.x, na.rm = TRUE), 
   max = ~max(.x, na.rm = TRUE)
@@ -219,8 +221,9 @@ 

Multiple functions#> # A tibble: 1 × 6 #> height_min height_max mass_min mass_max birth_year_min birth_year_max #> <int> <int> <dbl> <dbl> <dbl> <dbl> -#> 1 66 264 15 1358 8 896 -starwars %>% summarise(across(c(height, mass, birth_year), min_max)) +#> 1 66 264 15 1358 8 896

+
+starwars %>% summarise(across(c(height, mass, birth_year), min_max))
 #> # A tibble: 1 × 6
 #>   height_min height_max mass_min mass_max birth_year_min birth_year_max
 #>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
@@ -228,20 +231,21 @@ 

Multiple functionsglue spec:

-
+
 starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}"))
 #> # A tibble: 1 × 6
 #>   min.height max.height min.mass max.mass min.birth_year max.birth_year
 #>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
-#> 1         66        264       15     1358              8            896
-starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}"))
+#> 1         66        264       15     1358              8            896
+
+starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}"))
 #> # A tibble: 1 × 6
 #>   min.height max.height min.mass max.mass min.birth_year max.birth_year
 #>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
 #> 1         66        264       15     1358              8            896

If you’d prefer all summaries with the same function to be grouped together, you’ll have to expand the calls yourself:

-
+
 starwars %>% summarise(
   across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"),
   across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}")
@@ -259,7 +263,7 @@ 

Multiple functionsacross() into a single expression that returns a tibble:

-
+
 starwars %>% summarise(
   tibble(
     across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"),
@@ -272,7 +276,7 @@ 

Multiple functions#> 1 66 15 8 264 1358 896

Alternatively we could reorganize results with relocate():

-
+
 starwars %>% 
   summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% 
   relocate(starts_with("min"))
@@ -288,7 +292,7 @@ 

Current columncur_column(). This can be useful if you want to perform some sort of context dependent transformation that’s already encoded in a vector:

-
+
 df <- tibble(x = 1:3, y = 3:5, z = 5:7)
 mult <- list(x = 1, y = 10, z = 100)
 
@@ -305,7 +309,7 @@ 

Gotchas

Be careful when combining numeric summaries with where(is.numeric):

-
+
 df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9))
 
 df %>% 
@@ -317,21 +321,21 @@ 

GotchasNA. You probably want to compute n() last to avoid this problem:

-
+
 df %>% 
   summarise(across(where(is.numeric), sd), n = n())
 #>   x        y n
 #> 1 1 4.041452 3

Alternatively, you could explicitly exclude n from the columns to operate on:

-
+
 df %>% 
   summarise(n = n(), across(where(is.numeric) & !n, sd))
 #>   n x        y
 #> 1 3 1 4.041452

Another approach is to combine both the call to n() and across() in a single expression that returns a tibble:

-
+
 df %>% 
   summarise(
     tibble(n = n(), across(where(is.numeric), sd))
@@ -348,7 +352,7 @@ 

Other verbs
+
 rescale01 <- function(x) {
   rng <- range(x, na.rm = TRUE)
   (x - rng[1]) / (rng[2] - rng[1])
@@ -374,7 +378,7 @@ 

Other verbs
+
 starwars %>% distinct(pick(contains("color")))
 #> # A tibble: 67 × 3
 #>   hair_color skin_color  eye_color
@@ -387,7 +391,7 @@ 

Other verbs
+
 starwars %>% count(pick(contains("color")), sort = TRUE)
 #> # A tibble: 67 × 4
 #>   hair_color skin_color eye_color     n
@@ -415,7 +419,7 @@ 

filter()if_any() keeps the rows where the predicate is true for at least one selected column: -
+
 starwars %>% 
   filter(if_any(everything(), ~ !is.na(.x)))
 #> # A tibble: 87 × 14
@@ -433,7 +437,7 @@ 

filter()if_all() keeps the rows where the predicate is true for all selected columns: -
+
 starwars %>% 
   filter(if_all(everything(), ~ !is.na(.x)))
 #> # A tibble: 29 × 14
@@ -467,7 +471,7 @@ 

Why do we like across()?across() makes it possible to express useful summaries that were previously impossible:

-
+
 df %>%
   group_by(g1, g2) %>% 
   summarise(
@@ -534,7 +538,7 @@ 

How do you convert existing code?

For example:

-
+
 df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE))
 # ->
 df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE)))
@@ -559,7 +563,7 @@ 

How do you convert existing code?if_any() and if_all() can be used inside filter() to keep rows for which the predicate is true for at least one, or all selected columns:

-
+
 df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1))
 
 # Find all rows where EVERY numeric variable is greater than zero
@@ -567,8 +571,9 @@ 

How do you convert existing code?#> # A tibble: 1 × 3 #> x y z #> <chr> <dbl> <dbl> -#> 1 b 1 1 - +#> 1 b 1 1

+
+
 # Find all rows where ANY numeric variable is greater than zero
 df %>% filter(if_any(where(is.numeric), ~ .x > 0))
 #> # A tibble: 2 × 3
@@ -584,14 +589,15 @@ 

How do you convert existing code?mutate_at(), and mutate_all(), which apply the transformations one at a time. We expect that you’ll generally find the new behaviour less surprising:

-
+
 df <- tibble(x = 2, y = 4, z = 8)
 df %>% mutate_all(~ .x / y)
 #> # A tibble: 1 × 3
 #>       x     y     z
 #>   <dbl> <dbl> <dbl>
-#> 1   0.5     1     8
-
+#> 1   0.5     1     8
+
+
 df %>% mutate(across(everything(), ~ .x / y))
 #> # A tibble: 1 × 3
 #>       x     y     z
diff --git a/dev/articles/dplyr.html b/dev/articles/dplyr.html
index 28aae514da..fd679b25f0 100644
--- a/dev/articles/dplyr.html
+++ b/dev/articles/dplyr.html
@@ -145,8 +145,9 @@ 

Data: starwars?starwars

 dim(starwars)
-#> [1] 87 14
-starwars
+#> [1] 87 14
+
+starwars
 #> # A tibble: 87 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
 #>   <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
@@ -222,7 +223,7 @@ 

Filter rows with filter()TRUE.

For example, we can select all character with light skin color and brown eyes with:

-
+
 starwars %>% filter(skin_color == "light", eye_color == "brown")
 #> # A tibble: 7 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -235,7 +236,7 @@ 

Filter rows with filter()#> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, #> # films <list>, vehicles <list>, starships <list>

This is roughly equivalent to this base R code:

-
+
 starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ]
@@ -247,7 +248,7 @@

Arrange rows with arrange() -
+
 starwars %>% arrange(height, mass)
 #> # A tibble: 87 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -260,7 +261,7 @@ 

Arrange rows with arrange()#> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, #> # films <list>, vehicles <list>, starships <list>

Use desc() to order a column in descending order:

-
+
 starwars %>% arrange(desc(height))
 #> # A tibble: 87 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -279,7 +280,7 @@ 

Choose rows using their pos

slice() lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows.

We can get characters from row numbers 5 through 10.

-
+
 starwars %>% slice(5:10)
 #> # A tibble: 6 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -297,7 +298,7 @@ 

Choose rows using their pos slice_head() and slice_tail() select the first or last rows. -
+
 starwars %>% slice_head(n = 3)
 #> # A tibble: 3 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -312,7 +313,7 @@ 

Choose rows using their pos slice_sample() randomly selects rows. Use the option prop to choose a certain proportion of the cases. -
+
 starwars %>% slice_sample(n = 5)
 #> # A tibble: 5 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -323,8 +324,9 @@ 

Choose rows using their pos #> 4 Luminara … 170 56.2 black yellow blue 58 fema… #> # ℹ 1 more row #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -starwars %>% slice_sample(prop = 0.1) +#> # films <list>, vehicles <list>, starships <list>

+
+starwars %>% slice_sample(prop = 0.1)
 #> # A tibble: 8 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
 #>   <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
@@ -344,7 +346,7 @@ 

Choose rows using their pos with highest or lowest values of a variable. Note that we first must choose only the values which are not NA. -
+
 starwars %>%
   filter(!is.na(height)) %>%
   slice_max(height, n = 3)
@@ -364,7 +366,7 @@ 

Select columns with select()select() allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions:

-
+
 # Select columns by name
 starwars %>% select(hair_color, skin_color, eye_color)
 #> # A tibble: 87 × 3
@@ -374,8 +376,9 @@ 

Select columns with select()#> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow -#> # ℹ 83 more rows -# Select all columns between hair_color and eye_color (inclusive) +#> # ℹ 83 more rows

+ + +
+# Select all columns ending with color
 starwars %>% select(ends_with("color"))
 #> # A tibble: 87 × 3
 #>   hair_color skin_color  eye_color
@@ -414,7 +419,7 @@ 

Select columns with select()

You can rename variables with select() by using named arguments:

-
+
 starwars %>% select(home_world = homeworld)
 #> # A tibble: 87 × 1
 #>   home_world
@@ -427,7 +432,7 @@ 

Select columns with select()But because select() drops all the variables not explicitly mentioned, it’s not that useful. Instead, use rename():

-
+
 starwars %>% rename(home_world = homeworld)
 #> # A tibble: 87 × 14
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -446,7 +451,7 @@ 

Add new columns with mutate()<

Besides selecting sets of existing columns, it’s often useful to add new columns that are functions of existing columns. This is the job of mutate():

-
+
 starwars %>% mutate(height_m = height / 100)
 #> # A tibble: 87 × 15
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -460,7 +465,7 @@ 

Add new columns with mutate()< #> # films <list>, vehicles <list>, starships <list>, height_m <dbl>

We can’t see the height in meters we just calculated, but we can fix that using a select command.

-
+
 starwars %>%
   mutate(height_m = height / 100) %>%
   select(height_m, height, everything())
@@ -477,7 +482,7 @@ 

Add new columns with mutate()<

dplyr::mutate() is similar to the base transform(), but allows you to refer to columns that you’ve just created:

-
+
 starwars %>%
   mutate(
     height_m = height / 100,
@@ -497,7 +502,7 @@ 

Add new columns with mutate()< #> # height_m <dbl>

If you only want to keep the new variables, use .keep = "none":

-
+
 starwars %>%
   mutate(
     height_m = height / 100,
@@ -518,7 +523,7 @@ 

Change column order with reloca

Use a similar syntax as select() to move blocks of columns at once

-
+
 starwars %>% relocate(sex:homeworld, .before = height)
 #> # A tibble: 87 × 14
 #>   name           sex   gender homeworld height  mass hair_color skin_color
@@ -536,7 +541,7 @@ 

Summarise values with summarise()

The last verb is summarise(). It collapses a data frame to a single row.

-
+
 starwars %>% summarise(height = mean(height, na.rm = TRUE))
 #> # A tibble: 1 × 1
 #>   height
@@ -575,7 +580,7 @@ 

Combining functions with %>% -
+
 a1 <- group_by(starwars, species, sex)
 a2 <- select(a1, height, mass)
 a3 <- summarise(a2,
@@ -584,7 +589,7 @@ 

Combining functions with %>%)

Or if you don’t want to name the intermediate results, you need to wrap the function calls inside each other:

-
+
 summarise(
   select(
     group_by(starwars, species, sex),
@@ -612,7 +617,7 @@ 

Combining functions with %>%f(x, y) so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as “then”):

-
+
 starwars %>%
   group_by(species, sex) %>%
   select(height, mass) %>%
@@ -641,7 +646,7 @@ 

Selecting operationsselect() with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr’s point of view:

-
+
 # `name` represents the integer 1
 select(starwars, name)
 #> # A tibble: 87 × 1
@@ -651,8 +656,9 @@ 

Selecting operations#> 2 C-3PO #> 3 R2-D2 #> 4 Darth Vader -#> # ℹ 83 more rows -select(starwars, 1) +#> # ℹ 83 more rows

+
+select(starwars, 1)
 #> # A tibble: 87 × 1
 #>   name          
 #>   <chr>         
@@ -665,7 +671,7 @@ 

Selecting operations
+
 height <- 5
 select(starwars, height)
 #> # A tibble: 87 × 1
@@ -681,7 +687,7 @@ 

Selecting operations
+
 name <- "color"
 select(starwars, ends_with(name))
 #> # A tibble: 87 × 3
@@ -694,7 +700,7 @@ 

Selecting operations#> # ℹ 83 more rows

These semantics are usually intuitive. But note the subtle difference:

-
+
 name <- 5
 select(starwars, name, identity(name))
 #> # A tibble: 87 × 2
@@ -712,7 +718,7 @@ 

Selecting operationsselect():

-
+
 vars <- c("name", "height")
 select(starwars, all_of(vars), "mass")
 #> # A tibble: 87 × 3
@@ -731,14 +737,14 @@ 

Mutating operationsselect() expects column names or positions, mutate() expects column vectors. We will set up a smaller tibble to use for our examples.

-
+
 df <- starwars %>% select(name, height, mass)

When we use select(), the bare column names stand for their own positions in the tibble. For mutate() on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to mutate():

-
+
 mutate(df, "height", 2)
 #> # A tibble: 87 × 5
 #>   name           height  mass `"height"`   `2`
@@ -753,7 +759,7 @@ 

Mutating operations to mutate(). This amounts to adding 10 to a string! The correct expression is:

-
+
 mutate(df, height + 10)
 #> # A tibble: 87 × 4
 #>   name           height  mass `height + 10`
@@ -768,7 +774,7 @@ 

Mutating operations
+
 var <- seq(1, nrow(df))
 mutate(df, new = var)
 #> # A tibble: 87 × 4
@@ -782,7 +788,7 @@ 

Mutating operationsgroup_by(). While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column:

-
+
 group_by(starwars, sex)
 #> # A tibble: 87 × 14
 #> # Groups:   sex [5]
@@ -794,8 +800,9 @@ 

Mutating operations#> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -group_by(starwars, sex = as.factor(sex)) +#> # films <list>, vehicles <list>, starships <list>

+
+group_by(starwars, sex = as.factor(sex))
 #> # A tibble: 87 × 14
 #> # Groups:   sex [5]
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -806,8 +813,9 @@ 

Mutating operations#> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -group_by(starwars, height_binned = cut(height, 3)) +#> # films <list>, vehicles <list>, starships <list>

+
+group_by(starwars, height_binned = cut(height, 3))
 #> # A tibble: 87 × 15
 #> # Groups:   height_binned [4]
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -822,7 +830,7 @@ 

Mutating operationsgroup_by(). This amounts to creating a new column containing the string recycled to the number of rows:

-
+
 group_by(df, "month")
 #> # A tibble: 87 × 4
 #> # Groups:   "month" [1]
diff --git a/dev/articles/grouping.html b/dev/articles/grouping.html
index ff00cab03e..c5b98f2118 100644
--- a/dev/articles/grouping.html
+++ b/dev/articles/grouping.html
@@ -152,8 +152,9 @@ 

#> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>, -#> # films <list>, vehicles <list>, starships <list> -by_sex_gender +#> # films <list>, vehicles <list>, starships <list>

+
+by_sex_gender
 #> # A tibble: 87 × 14
 #> # Groups:   sex, gender [6]
 #>   name       height  mass hair_color skin_color eye_color birth_year sex  
@@ -168,7 +169,7 @@ 

Or use tally() to count the number of rows in each group. The sort argument is useful if you want to see the largest groups up front.

-
+
 by_species %>% tally()
 #> # A tibble: 38 × 2
 #>   species      n
@@ -177,8 +178,9 @@ 

#> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 -#> # ℹ 34 more rows - +#> # ℹ 34 more rows

+
+
 by_sex_gender %>% tally(sort = TRUE)
 #> # A tibble: 6 × 3
 #> # Groups:   sex [5]
@@ -193,7 +195,7 @@ 

function of existing variables. This is equivalent to performing a mutate() before the group_by():

-
+
 bmi_breaks <- c(0, 18.5, 25, 30, Inf)
 
 starwars %>%
@@ -214,7 +216,7 @@ 

Group metadataYou can see underlying group data with group_keys(). It has one row for each group and one column for each grouping variable:

-
+
 by_species %>% group_keys()
 #> # A tibble: 38 × 1
 #>   species 
@@ -223,8 +225,9 @@ 

Group metadata#> 2 Besalisk #> 3 Cerean #> 4 Chagrian -#> # ℹ 34 more rows - +#> # ℹ 34 more rows

+
+
 by_sex_gender %>% group_keys()
 #> # A tibble: 6 × 2
 #>   sex            gender   
@@ -236,7 +239,7 @@ 

Group metadata#> # ℹ 2 more rows

You can see which group each row belongs to with group_indices():

-
+
 by_species %>% group_indices()
 #>  [1] 11  6  6 11 11 11 11  6 11 11 11 11 34 11 24 12 11 38 36 11 11  6 31
 #> [24] 11 11 18 11 11  8 26 11 21 11 11 10 10 10 11 30  7 11 11 37 32 32  1
@@ -244,7 +247,7 @@ 

Group metadata#> [70] 2 15 15 11 6 25 19 28 14 34 11 38 22 11 11 11 6 11

And which rows each group contains with group_rows():

-
+
 by_species %>% group_rows() %>% head()
 #> <list_of<integer>[6]>
 #> [[1]]
@@ -266,10 +269,11 @@ 

Group metadata#> [1] 2 3 8 22 74 86

Use group_vars() if you just want the names of the grouping variables:

-
+
 by_species %>% group_vars()
-#> [1] "species"
-by_sex_gender %>% group_vars()
+#> [1] "species"
+
+by_sex_gender %>% group_vars()
 #> [1] "sex"    "gender"

Changing and adding to grouping variables @@ -278,7 +282,7 @@

Changing and adding to groupi will overwrite the existing grouping variables. For example, the following code groups by homeworld instead of species:

-
+
 by_species %>%
   group_by(homeworld) %>%
   tally()
@@ -294,7 +298,7 @@ 

Changing and adding to groupi .add = TRUE1. For example, the following code groups by species and homeworld:

-
+
 by_species %>%
   group_by(homeworld, .add = TRUE) %>%
   tally()
@@ -312,7 +316,7 @@ 

Changing and adding to groupi

Removing grouping variables

To remove all grouping variables, use ungroup():

-
+
 by_species %>%
   ungroup() %>%
   tally()
@@ -322,7 +326,7 @@ 

Removing grouping variables#> 1 87

You can also choose to selectively ungroup by listing the variables you want to remove:

-
+
 by_sex_gender %>% 
   ungroup(sex) %>% 
   tally()
@@ -346,7 +350,7 @@ 

summarise() computes a summary for each group. This means that it starts from group_keys(), adding summary variables to the right hand side:

-
+
 by_species %>%
   summarise(
     n = n(),
@@ -365,14 +369,15 @@ 

grouping variable corresponds to .groups = "drop_last" without a message or .groups = NULL with a message (the default).

-
+
 by_sex_gender %>% 
   summarise(n = n()) %>% 
   group_vars()
 #> `summarise()` has grouped output by 'sex'. You can override using the
 #> `.groups` argument.
-#> [1] "sex"
-
+#> [1] "sex"
+
+
 by_sex_gender %>% 
   summarise(n = n(), .groups = "drop_last") %>% 
   group_vars()
@@ -380,12 +385,13 @@ 

Since version 1.0.0 the groups may also be kept (.groups = "keep") or dropped (.groups = "drop").

-
+
 by_sex_gender %>% 
   summarise(n = n(), .groups = "keep") %>% 
   group_vars()
-#> [1] "sex"    "gender"
-
+#> [1] "sex"    "gender"
+
+
 by_sex_gender %>% 
   summarise(n = n(), .groups = "drop") %>% 
   group_vars()
@@ -403,7 +409,7 @@ 

position of existing columns. Grouped select() is almost identical to ungrouped select, except that it always includes the grouping variables:

-
+
 by_species %>% select(mass)
 #> Adding missing grouping variables: `species`
 #> # A tibble: 87 × 2
@@ -426,7 +432,7 @@ 

Grouped arrange() is the same as ungrouped arrange(), unless you set .by_group = TRUE, in which case it will order first by the grouping variables.

-
+
 by_species %>%
   arrange(desc(mass)) %>%
   relocate(species, mass)
@@ -440,8 +446,9 @@ 

#> 4 Human 136 Darth V… 202 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 6 more variables: sex <chr>, gender <chr>, homeworld <chr>, -#> # films <list>, vehicles <list>, starships <list> - +#> # films <list>, vehicles <list>, starships <list>

+
+
 by_species %>%
   arrange(desc(mass), .by_group = TRUE) %>%
   relocate(species, mass)
@@ -467,7 +474,7 @@ 

In simple cases with vectorised functions, grouped and ungrouped mutate() give the same results. They differ when used with summary functions:

-
+
 # Subtract off global mean
 starwars %>% 
   select(name, homeworld, mass) %>% 
@@ -479,8 +486,9 @@ 

#> 2 C-3PO Tatooine 75 -22.3 #> 3 R2-D2 Naboo 32 -65.3 #> 4 Darth Vader Tatooine 136 38.7 -#> # ℹ 83 more rows - +#> # ℹ 83 more rows

+
+
 # Subtract off homeworld mean
 starwars %>% 
   select(name, homeworld, mass) %>% 
@@ -496,7 +504,7 @@ 

#> 4 Darth Vader Tatooine 136 50.6 #> # ℹ 83 more rows

Or with window functions like min_rank():

-
+
 # Overall rank
 starwars %>% 
   select(name, homeworld, height) %>% 
@@ -508,8 +516,9 @@ 

#> 2 C-3PO Tatooine 167 20 #> 3 R2-D2 Naboo 96 5 #> 4 Darth Vader Tatooine 202 72 -#> # ℹ 83 more rows - +#> # ℹ 83 more rows

+
+
 # Rank per homeworld
 starwars %>% 
   select(name, homeworld, height) %>% 
@@ -534,7 +543,7 @@ 

keeps the rows where the variable is TRUE. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species:

-
+
 by_species %>%
   select(name, species, height) %>% 
   filter(height == max(height))
@@ -550,7 +559,7 @@ 

You can also use filter() to remove entire groups. For example, the following code eliminates all groups that only have a single member:

-
+
 by_species %>%
   filter(n() != 1) %>% 
   tally()
@@ -572,7 +581,7 @@ 

slice_min() and slice_max()) select rows within a group. For example, we can select the first observation within each species:

-
+
 by_species %>%
   relocate(species) %>% 
   slice(1)
@@ -589,7 +598,7 @@ 

#> # films <list>, vehicles <list>, starships <list>

Similarly, we can use slice_min() to select the smallest n values of a variable:

-
+
 by_species %>%
   filter(!is.na(height)) %>% 
   slice_min(height, n = 2)
diff --git a/dev/articles/programming.html b/dev/articles/programming.html
index 9c1c22ea99..a2b33098b4 100644
--- a/dev/articles/programming.html
+++ b/dev/articles/programming.html
@@ -445,8 +445,9 @@ 

Any number of user-supplied exp #> 2 Aleen Minor 15 79 #> 3 Bespin 79 175 #> 4 Bestine IV 110 180 -#> # ℹ 45 more rows -starwars %>% my_summarise(sex, gender) +#> # ℹ 45 more rows

+
+starwars %>% my_summarise(sex, gender)
 #> `summarise()` has grouped output by 'sex'. You can override using the
 #> `.groups` argument.
 #> # A tibble: 6 × 4
@@ -468,7 +469,7 @@ 

Creating multiple columns

Sometimes it can be useful for a single expression to return multiple columns. You can do this by returning an unnamed data frame:

-
+
 quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) {
   tibble(
     val = quantile(x, probs),
@@ -487,7 +488,7 @@ 

Creating multiple columnsThis sort of function is useful inside summarise() and mutate() which allow you to add multiple columns by returning a data frame:

-
+
 df <- tibble(
   grp = rep(1:3, each = 10),
   x = runif(30),
@@ -502,8 +503,9 @@ 

Creating multiple columns#> <int> <dbl> <dbl> #> 1 1 0.361 0.5 #> 2 2 0.541 0.5 -#> 3 3 0.456 0.5 - +#> 3 3 0.456 0.5

+
+
 df %>%
   group_by(grp) %>%
   summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE))
@@ -525,7 +527,7 @@ 

Creating multiple columnsreframe(). summarise() is restricted to returning 1 row summaries per group, but reframe() lifts this restriction:

-
+
 df %>%
   group_by(grp) %>%
   reframe(across(x:y, quantile_df, .unpack = TRUE))
@@ -543,7 +545,7 @@ 

Transforming user-supplied variabl

If you want the user to provide a set of data-variables that are then transformed, use across() and pick():

-
+
 my_summarise <- function(data, summary_vars) {
   data %>%
     summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE)))
@@ -561,7 +563,7 @@ 

Transforming user-supplied variabl #> # ℹ 34 more rows

You can use this same idea for multiple sets of input data-variables:

-
+
 my_summarise <- function(data, group_var, summarise_var) {
   data %>%
     group_by(pick({{ group_var }})) %>% 
@@ -569,7 +571,7 @@ 

Transforming user-supplied variabl }

Use the .names argument to across() to control the names of the output.

-
+
 my_summarise <- function(data, group_var, summarise_var) {
   data %>%
     group_by(pick({{ group_var }})) %>% 
@@ -582,13 +584,13 @@ 

Loop over multiple variables
+
 for (var in names(mtcars)) {
   mtcars %>% count(.data[[var]]) %>% print()
 }

This same technique works with for loop alternatives like the base R apply() family and the purrr map() family:

-
+
 mtcars %>% 
   names() %>% 
   purrr::map(~ count(mtcars, .data[[.x]]))
@@ -600,7 +602,7 @@

Use a variable from an Shiny input

Many Shiny input controls return character vectors, so you can use the same approach as above: .data[[input$var]].

-
+
 library(shiny)
 ui <- fluidPage(
   selectInput("var", "Variable", choices = names(diamonds)),
diff --git a/dev/articles/rowwise.html b/dev/articles/rowwise.html
index 2d052bd00a..b028bbcdea 100644
--- a/dev/articles/rowwise.html
+++ b/dev/articles/rowwise.html
@@ -160,8 +160,9 @@ 

Creating#> x y z m #> <int> <int> <int> <dbl> #> 1 1 3 5 3.5 -#> 2 2 4 6 3.5 -df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) +#> 2 2 4 6 3.5

+
+df %>% rowwise() %>% mutate(m = mean(c(x, y, z)))
 #> # A tibble: 2 × 4
 #> # Rowwise: 
 #>       x     y     z     m
@@ -176,7 +177,7 @@ 

Creatingrowwise(). These variables are preserved when you call summarise(), so they behave somewhat similarly to the grouping variables passed to group_by():

-
+
 df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6)
 
 df %>% 
@@ -186,8 +187,9 @@ 

Creating#> m #> <dbl> #> 1 3 -#> 2 4 - +#> 2 4

+
+
 df %>% 
   rowwise(name) %>% 
   summarise(m = mean(c(x, y, z)))
@@ -211,7 +213,7 @@ 

Per row summary statisticsrowwise() it also makes it easy to summarise values across columns within one row. To see how, we’ll start by making a little dataset:

-
+
 df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45)
 df
 #> # A tibble: 6 × 5
@@ -225,11 +227,11 @@ 

Per row summary statisticsLet’s say we want compute the sum of w, x, y, and z for each row. We start by making a row-wise data frame:

-
+
 

We can then use mutate() to add a new column to each row, or summarise() to return just that one summary:

-
+
 rf %>% mutate(total = sum(c(w, x, y, z)))
 #> # A tibble: 6 × 6
 #> # Rowwise:  id
@@ -239,8 +241,9 @@ 

Per row summary statistics#> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 -#> # ℹ 2 more rows -rf %>% summarise(total = sum(c(w, x, y, z))) +#> # ℹ 2 more rows

+
+rf %>% summarise(total = sum(c(w, x, y, z)))
 #> `summarise()` has grouped output by 'id'. You can override using the
 #> `.groups` argument.
 #> # A tibble: 6 × 2
@@ -256,7 +259,7 @@ 

Per row summary statisticsc_across() which uses tidy selection syntax so you can to succinctly select many variables:

-
+
 rf %>% mutate(total = sum(c_across(w:z)))
 #> # A tibble: 6 × 6
 #> # Rowwise:  id
@@ -266,8 +269,9 @@ 

Per row summary statistics#> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 -#> # ℹ 2 more rows -rf %>% mutate(total = sum(c_across(where(is.numeric)))) +#> # ℹ 2 more rows

+
+rf %>% mutate(total = sum(c_across(where(is.numeric))))
 #> # A tibble: 6 × 6
 #> # Rowwise:  id
 #>      id     w     x     y     z total
@@ -280,7 +284,7 @@ 

Per row summary statisticsYou could combine this with column-wise operations (see vignette("colwise") for more details) to compute the proportion of the total for each column:

-
+
 rf %>% 
   mutate(total = sum(c_across(w:z))) %>% 
   ungroup() %>% 
@@ -302,7 +306,7 @@ 

Row-wise summary functions -
+
 df %>% mutate(total = rowSums(pick(where(is.numeric), -id)))
 #> # A tibble: 6 × 6
 #>      id     w     x     y     z total
@@ -311,8 +315,9 @@ 

Row-wise summary functions#> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 -#> # ℹ 2 more rows -df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id))) +#> # ℹ 2 more rows

+
+df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id)))
 #> # A tibble: 6 × 6
 #>      id     w     x     y     z  mean
 #>   <int> <int> <int> <int> <int> <dbl>
@@ -342,12 +347,12 @@ 

Motivation
+
 df <- tibble(
   x = list(1, 2:3, 4:6)
 )

You might try calling length():

-
+
 df %>% mutate(l = length(x))
 #> # A tibble: 3 × 2
 #>   x             l
@@ -358,7 +363,7 @@ 

Motivation
+
 df %>% mutate(l = lengths(x))
 #> # A tibble: 3 × 2
 #>   x             l
@@ -370,15 +375,16 @@ 

Motivationsapply(), vapply(), or one of the purrr map() functions:

-
+
 df %>% mutate(l = sapply(x, length))
 #> # A tibble: 3 × 2
 #>   x             l
 #>   <list>    <int>
 #> 1 <dbl [1]>     1
 #> 2 <int [2]>     2
-#> 3 <int [3]>     3
-df %>% mutate(l = purrr::map_int(x, length))
+#> 3 <int [3]>     3
+
+df %>% mutate(l = purrr::map_int(x, length))
 #> # A tibble: 3 × 2
 #>   x             l
 #>   <list>    <int>
@@ -390,7 +396,7 @@ 

Motivation
+
 df %>% 
   rowwise() %>% 
   mutate(l = length(x))
@@ -412,21 +418,22 @@ 

Subsetting
+
 df <- tibble(g = 1:2, y = list(1:3, "a"))
 gf <- df %>% group_by(g)
 rf <- df %>% rowwise(g)

If we compute some properties of y, you’ll notice the results look different:

-
+
 gf %>% mutate(type = typeof(y), length = length(y))
 #> # A tibble: 2 × 4
 #> # Groups:   g [2]
 #>       g y         type  length
 #>   <int> <list>    <chr>  <int>
 #> 1     1 <int [3]> list       1
-#> 2     2 <chr [1]> list       1
-rf %>% mutate(type = typeof(y), length = length(y))
+#> 2     2 <chr [1]> list       1
+
+rf %>% mutate(type = typeof(y), length = length(y))
 #> # A tibble: 2 × 4
 #> # Rowwise:  g
 #>       g y         type      length
@@ -438,15 +445,16 @@ 

Subsetting
+
 # grouped
 out1 <- integer(2)
 for (i in 1:2) {
   out1[[i]] <- length(df$y[i])
 }
 out1
-#> [1] 1 1
-
+#> [1] 1 1
+
+
 # rowwise
 out2 <- integer(2)
 for (i in 1:2) {
@@ -458,22 +466,24 @@ 

Subsetting
+
 gf %>% mutate(y2 = y)
 #> # A tibble: 2 × 3
 #> # Groups:   g [2]
 #>       g y         y2       
 #>   <int> <list>    <list>   
 #> 1     1 <int [3]> <int [3]>
-#> 2     2 <chr [1]> <chr [1]>
-rf %>% mutate(y2 = y)
+#> 2     2 <chr [1]> <chr [1]>
+
+rf %>% mutate(y2 = y)
 #> Error in `mutate()`:
 #>  In argument: `y2 = y`.
 #>  In row 1.
 #> Caused by error:
 #> ! `y2` must be size 1, not 3.
-#>  Did you mean: `y2 = list(y)` ?
-rf %>% mutate(y2 = list(y))
+#>  Did you mean: `y2 = list(y)` ?
+
+rf %>% mutate(y2 = list(y))
 #> # A tibble: 2 × 3
 #> # Rowwise:  g
 #>       g y         y2       
@@ -487,7 +497,7 @@ 

Modellingrowwise() data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We’ll start by creating a nested data frame:

-
+
 by_cyl <- mtcars %>% nest_by(cyl)
 by_cyl
 #> # A tibble: 3 × 2
@@ -505,7 +515,7 @@ 

Modelling
+
 mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data)))
 mods
 #> # A tibble: 3 × 3
@@ -516,7 +526,7 @@ 

Modelling#> 2 6 <tibble [7 × 10]> <lm> #> 3 8 <tibble [14 × 10]> <lm>

And supplement that with one set of predictions per row:

-
+
 mods <- mods %>% mutate(pred = list(predict(mod, data)))
 mods
 #> # A tibble: 3 × 4
@@ -527,7 +537,7 @@ 

Modelling#> 2 6 <tibble [7 × 10]> <lm> <dbl [7]> #> 3 8 <tibble [14 × 10]> <lm> <dbl [14]>

You could then summarise the model in a variety of ways:

-
+
 mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2)))
 #> `summarise()` has grouped output by 'cyl'. You can override using the
 #> `.groups` argument.
@@ -537,8 +547,9 @@ 

Modelling#> <dbl> <dbl> #> 1 4 3.01 #> 2 6 0.985 -#> 3 8 1.87 -mods %>% summarise(rsq = summary(mod)$r.squared) +#> 3 8 1.87

+
+mods %>% summarise(rsq = summary(mod)$r.squared)
 #> `summarise()` has grouped output by 'cyl'. You can override using the
 #> `.groups` argument.
 #> # A tibble: 3 × 2
@@ -547,8 +558,9 @@ 

Modelling#> <dbl> <dbl> #> 1 4 0.509 #> 2 6 0.465 -#> 3 8 0.423 -mods %>% summarise(broom::glance(mod)) +#> 3 8 0.423

+
+mods %>% summarise(broom::glance(mod))
 #> `summarise()` has grouped output by 'cyl'. You can override using the
 #> `.groups` argument.
 #> # A tibble: 3 × 13
@@ -561,7 +573,7 @@ 

Modelling#> # ℹ 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>, #> # nobs <int>

Or easily access the parameters of each model:

-
+
 mods %>% reframe(broom::tidy(mod))
 #> # A tibble: 6 × 6
 #>     cyl term        estimate std.error statistic    p.value
@@ -590,7 +602,7 @@ 

Simulations
+
 df <- tribble(
   ~ n, ~ min, ~ max,
     1,     0,     1,
@@ -599,7 +611,7 @@ 

Simulations)

You can supply these parameters to runif() by using rowwise() and mutate():

-
+
 df %>% 
   rowwise() %>% 
   mutate(data = list(runif(n, min, max)))
@@ -615,7 +627,7 @@ 

Simulationslist() means that we’ll get a list column where each row is a list containing multiple values. If you forget to use list(), dplyr will give you a hint:

-
+
 df %>% 
   rowwise() %>% 
   mutate(data = runif(n, min, max))
@@ -633,7 +645,7 @@ 

Multiple combinationsexpand.grid() (or tidyr::expand_grid()) to generate the data frame and then repeat the same pattern as above:

-
+
 df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100))
 
 df %>% 
@@ -657,7 +669,7 @@ 

Varying functionsdo.call():

-
+
 df <- tribble(
    ~rng,     ~params,
    "runif",  list(n = 10), 
@@ -715,7 +727,7 @@ 

Without argument names: you could call functions that input and output data frames using . to refer to the “current” group. For example, the following code gets the first row of each group:

-
+
 mtcars %>% 
   group_by(cyl) %>% 
   do(head(., 1))
@@ -729,7 +741,7 @@ 

This has been superseded by pick() plus reframe(), a variant of summarise() that can create multiple rows and columns per group.

-
+
 mtcars %>% 
   group_by(cyl) %>% 
   reframe(head(pick(everything()), 1))
@@ -743,7 +755,7 @@ 

  • With arguments: it worked like mutate() but automatically wrapped every element in a list:

    -
    +
     mtcars %>% 
       group_by(cyl) %>% 
       do(nrows = nrow(.))
    @@ -757,7 +769,7 @@ 

    I now believe that behaviour is both too magical and not very useful, and it can be replaced by summarise() and pick().

    -
    +
     mtcars %>% 
       group_by(cyl) %>% 
       summarise(nrows = nrow(pick(everything())))
    diff --git a/dev/articles/two-table.html b/dev/articles/two-table.html
    index 36e04babce..5d00ab04e4 100644
    --- a/dev/articles/two-table.html
    +++ b/dev/articles/two-table.html
    @@ -229,8 +229,9 @@ 

    Controlling how the tables are m #> 4 2013 1 1 5 JFK BQN N804JB B6 NA NA NA #> 5 2013 1 1 6 LGA ATL N668DN DL Hartsf… 33.6 -84.4 #> # ℹ 336,771 more rows -#> # ℹ 4 more variables: alt <dbl>, tz <dbl>, dst <chr>, tzone <chr> -flights2 %>% left_join(airports, c("origin" = "faa")) +#> # ℹ 4 more variables: alt <dbl>, tz <dbl>, dst <chr>, tzone <chr>

    +
    +flights2 %>% left_join(airports, c("origin" = "faa"))
     #> # A tibble: 336,776 × 15
     #>    year month   day  hour origin dest  tailnum carrier name      lat   lon
     #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>   <dbl> <dbl>
    @@ -250,14 +251,14 @@ 

    Types of join
    +
     df1 <- tibble(x = c(1, 2), y = 2:1)
     df2 <- tibble(x = c(3, 1), a = 10, b = "a")
    • inner_join(x, y) only includes observations that match in both x and y.

      -
      +
       df1 %>% inner_join(df2) %>% knitr::kable()
       #> Joining with `by = join_by(x)`
      @@ -280,7 +281,7 @@

      Types of join
      +
       df1 %>% left_join(df2)
       #> Joining with `by = join_by(x)`
       #> # A tibble: 2 × 4
      @@ -293,15 +294,16 @@ 

      Types of join
      +
       df1 %>% right_join(df2)
       #> Joining with `by = join_by(x)`
       #> # A tibble: 2 × 4
       #>       x     y     a b    
       #>   <dbl> <int> <dbl> <chr>
       #> 1     1     2    10 a    
      -#> 2     3    NA    10 a
      -df2 %>% left_join(df1)
      +#> 2     3    NA    10 a
      +
      +df2 %>% left_join(df1)
       #> Joining with `by = join_by(x)`
       #> # A tibble: 2 × 4
       #>       x     a b         y
      @@ -312,7 +314,7 @@ 

      Types of joinfull_join() includes all observations from x and y.

      -
      +
       df1 %>% full_join(df2)
       #> Joining with `by = join_by(x)`
       #> # A tibble: 3 × 4
      @@ -334,7 +336,7 @@ 

      Observations
      +
       df1 <- tibble(x = c(1, 1, 2), y = 1:3)
       df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a"))
       
      @@ -371,7 +373,7 @@ 

      Filtering joinsThese are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don’t have a matching tail number in the planes table:

      -
      +
       library("nycflights13")
       flights %>% 
         anti_join(planes, by = "tailnum") %>% 
      @@ -389,22 +391,24 @@ 

      Filtering joinssemi_join() or anti_join(). semi_join() and anti_join() never duplicate; they only ever remove observations.

      -
      +
       df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4)
       df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a"))
       
       # Four rows to start with:
       df1 %>% nrow()
      -#> [1] 4
      -# And we get four rows after the join
      +#> [1] 4
      +
      +# And we get four rows after the join
       df1 %>% inner_join(df2, by = "x") %>% nrow()
       #> Warning in inner_join(., df2, by = "x"): Detected an unexpected many-to-many relationship between `x` and `y`.
       #>  Row 1 of `x` matches multiple rows in `y`.
       #>  Row 1 of `y` matches multiple rows in `x`.
       #>  If a many-to-many relationship is expected, set `relationship =
       #>   "many-to-many"` to silence this warning.
      -#> [1] 4
      -# But only two rows actually match
      +#> [1] 4
      +
      +# But only two rows actually match
       df1 %>% semi_join(df2, by = "x") %>% nrow()
       #> [1] 2
      @@ -428,40 +432,44 @@

      Set operationsy.

      Given this simple data:

      -
      +
       (df1 <- tibble(x = 1:2, y = c(1L, 1L)))
       #> # A tibble: 2 × 2
       #>       x     y
       #>   <int> <int>
       #> 1     1     1
      -#> 2     2     1
      -(df2 <- tibble(x = 1:2, y = 1:2))
      +#> 2     2     1
      +
      +(df2 <- tibble(x = 1:2, y = 1:2))
       #> # A tibble: 2 × 2
       #>       x     y
       #>   <int> <int>
       #> 1     1     1
       #> 2     2     2

      The four possibilities are:

      -
      +
       intersect(df1, df2)
       #> # A tibble: 1 × 2
       #>       x     y
       #>   <int> <int>
      -#> 1     1     1
      -# Note that we get 3 rows, not 4
      +#> 1     1     1
      +
      +# Note that we get 3 rows, not 4
       union(df1, df2)
       #> # A tibble: 3 × 2
       #>       x     y
       #>   <int> <int>
       #> 1     1     1
       #> 2     2     1
      -#> 3     2     2
      -setdiff(df1, df2)
      +#> 3     2     2
      +
      +setdiff(df1, df2)
       #> # A tibble: 1 × 2
       #>       x     y
       #>   <int> <int>
      -#> 1     2     1
      -setdiff(df2, df1)
      +#> 1     2     1
      +
      +setdiff(df2, df1)
       #> # A tibble: 1 × 2
       #>       x     y
       #>   <int> <int>
      diff --git a/dev/articles/window-functions.html b/dev/articles/window-functions.html
      index 8dae83944b..1f4c11e809 100644
      --- a/dev/articles/window-functions.html
      +++ b/dev/articles/window-functions.html
      @@ -202,10 +202,12 @@ 

      Ranking functionsx <- c(1, 1, 2, 2, 2) row_number(x) -#> [1] 1 2 3 4 5 -min_rank(x) -#> [1] 1 1 3 3 3 -dense_rank(x) +#> [1] 1 2 3 4 5

      +
      +min_rank(x)
      +#> [1] 1 1 3 3 3
      +
      +dense_rank(x)
       #> [1] 1 1 2 2 2

      If you’re familiar with R, you may recognise that row_number() and min_rank() can be computed @@ -216,14 +218,15 @@

      Ranking functionspercent_rank() gives the percentage of the rank; cume_dist() gives the proportion of values less than or equal to the current value.

      -
      +
       cume_dist(x)
      -#> [1] 0.4 0.4 1.0 1.0 1.0
      -percent_rank(x)
      +#> [1] 0.4 0.4 1.0 1.0 1.0
      +
      +percent_rank(x)
       #> [1] 0.0 0.0 0.5 0.5 0.5

      These are useful if you want to select (for example) the top 10% of records within each group. For example:

      -
      +
       filter(players, cume_dist(desc(G)) < 0.1)
       #> # A tibble: 1,090 × 7
       #> # Groups:   playerID [995]
      @@ -240,12 +243,13 @@ 

      Ranking functionsntile() to divide the players within a team into four ranked groups, and calculate the average number of games within each group.

      -
      +
       by_team_player <- group_by(batting, teamID, playerID)
       by_team <- summarise(by_team_player, G = sum(G))
       #> `summarise()` has grouped output by 'teamID'. You can override using the
      -#> `.groups` argument.
      -by_team_quartile <- group_by(by_team, quartile = ntile(G, 4))
      +#> `.groups` argument.
      +
      +by_team_quartile <- group_by(by_team, quartile = ntile(G, 4))
       summarise(by_team_quartile, mean(G))
       #> # A tibble: 4 × 2
       #>   quartile `mean(G)`
      @@ -264,17 +268,18 @@ 

      Lead and laglead() and lag() produce offset versions of a input vector that is either ahead of or behind the original vector.

      -
      +
       x <- 1:5
       lead(x)
      -#> [1]  2  3  4  5 NA
      -lag(x)
      +#> [1]  2  3  4  5 NA
      +
      +lag(x)
       #> [1] NA  1  2  3  4

      You can use them to:

      • Compute differences or percent changes.

        -
        +
         # Compute the relative change in games played
         mutate(players, G_delta = G - lag(G))

        Using lag() is more convenient than diff() @@ -283,7 +288,7 @@

        Lead and lag
        +
         

      • @@ -295,7 +300,7 @@

        Lead and lag
        +
         df <- data.frame(year = 2000:2005, value = (0:5) ^ 2)
         scrambled <- df[sample(nrow(df)), ]
         
        @@ -307,8 +312,9 @@ 

        Lead and lag#> 3 2002 4 9 #> 4 2003 9 16 #> 5 2004 16 NA -#> 6 2005 25 1 - +#> 6 2005 25 1

        +
        +
         right <- mutate(scrambled, prev_value = lag(value, order_by = year))
         arrange(right, year)
         #>   year value prev_value
        @@ -335,14 +341,14 @@ 

        Cumulative aggregatescumany() to find all records for a player after they played a year with 150 games:

        -
        +
         filter(players, cumany(G > 150))

        Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an order_by argument so dplyr provides a helper: order_by(). You give it the variable you want to order by, and then the call to the window function:

        -
        +
         x <- 1:10
         y <- 10:1
         order_by(y, cumsum(x))
        @@ -359,7 +365,7 @@ 

        Recycled aggregates
        +
         filter(players, G > mean(G))
         filter(players, G < median(G))

        While most SQL databases don’t have an equivalent of @@ -369,7 +375,7 @@

        Recycled aggregates
        +
         filter(players, ntile(G, 2) == 2)

        You can also use this idea to select the records with the highest (x == max(x)) or lowest value (x == min(x)) @@ -379,7 +385,7 @@

        Recycled aggregatesmutate(). For example, with the batting data, we could compute the “career year”, the number of years a player has played since they entered the league:

        -
        +
         mutate(players, career_year = yearID - min(yearID) + 1)
         #> # A tibble: 20,874 × 8
         #> # Groups:   playerID [1,436]
        @@ -391,7 +397,7 @@ 

        Recycled aggregates#> 4 aaronha01 1957 ML1 151 615 118 198 4 #> # ℹ 20,870 more rows

        Or, as in the introductory example, we could compute a z-score:

        -
        +
         mutate(players, G_z = (G - mean(G)) / sd(G))
         #> # A tibble: 20,874 × 8
         #> # Groups:   playerID [1,436]
        diff --git a/dev/pkgdown.yml b/dev/pkgdown.yml
        index fd914c78b2..336024ef54 100644
        --- a/dev/pkgdown.yml
        +++ b/dev/pkgdown.yml
        @@ -11,7 +11,7 @@ articles:
           rowwise: rowwise.html
           two-table: two-table.html
           window-functions: window-functions.html
        -last_built: 2024-05-15T12:46Z
        +last_built: 2024-06-26T13:27Z
         urls:
           reference: https://dplyr.tidyverse.org/reference
           article: https://dplyr.tidyverse.org/articles
        diff --git a/dev/reference/compute.html b/dev/reference/compute.html
        index cc2e7b86db..1e3e895dde 100644
        --- a/dev/reference/compute.html
        +++ b/dev/reference/compute.html
        @@ -144,7 +144,7 @@ 

        Examples# Compute query and save in remote table compute(remote) #> # Source: table<`dbplyr_rUoBF7QxMz`> [?? x 5] -#> # Database: sqlite 3.45.2 [:memory:] +#> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat #> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 18.7 8 360 175 3.15 @@ -182,7 +182,7 @@

        Examples# Creates a fresh query based on the generated SQL collapse(remote) #> # Source: SQL [?? x 5] -#> # Database: sqlite 3.45.2 [:memory:] +#> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat #> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 18.7 8 360 175 3.15 diff --git a/dev/reference/filter_all.html b/dev/reference/filter_all.html index 05cb779dbc..bef3535f73 100644 --- a/dev/reference/filter_all.html +++ b/dev/reference/filter_all.html @@ -161,12 +161,12 @@

        Examples#> <predicate intersection> #> <quosure> #> expr: ^is.na(.) -#> env: 0x558a995fa990 +#> env: 0x5596aadf3920 any_vars(is.na(.)) #> <predicate union> #> <quosure> #> expr: ^is.na(.) -#> env: 0x558a995fa990 +#> env: 0x5596aadf3920 # You can take the intersection of the replicated expressions: diff --git a/dev/reference/funs.html b/dev/reference/funs.html index 7ac47ef706..e3d079db50 100644 --- a/dev/reference/funs.html +++ b/dev/reference/funs.html @@ -136,12 +136,12 @@

        Examples#> $mean #> function (x, ...) #> UseMethod("mean") -#> <bytecode: 0x558a8da7f498> +#> <bytecode: 0x55969f40e0b8> #> <environment: namespace:base> #> #> $mean #> ~mean(.x, na.rm = TRUE) -#> <environment: 0x558a8feceaf0> +#> <environment: 0x5596a276ac58> #> funs(m1 = mean, m2 = "mean", m3 = mean(., na.rm = TRUE)) @@ -162,7 +162,7 @@

        Examples#> $m1 #> function (x, ...) #> UseMethod("mean") -#> <bytecode: 0x558a8da7f498> +#> <bytecode: 0x55969f40e0b8> #> <environment: namespace:base> #> #> $m2 @@ -170,7 +170,7 @@

        Examples#> #> $m3 #> ~mean(.x, na.rm = TRUE) -#> <environment: 0x558a8feceaf0> +#> <environment: 0x5596a276ac58> #>

        diff --git a/dev/reference/src_dbi.html b/dev/reference/src_dbi.html index d286f4886e..4cc6e38d20 100644 --- a/dev/reference/src_dbi.html +++ b/dev/reference/src_dbi.html @@ -164,7 +164,7 @@

        Examplesmtcars <- con %>% tbl("mtcars") mtcars #> # Source: table<`mtcars`> [?? x 11] -#> # Database: sqlite 3.45.2 [:memory:] +#> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 @@ -182,7 +182,7 @@

        Examples# You can also use pass raw SQL if you want a more sophisticated query con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl == 8")) #> # Source: SQL [?? x 11] -#> # Database: sqlite 3.45.2 [:memory:] +#> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 diff --git a/dev/search.json b/dev/search.json index db7bf69b7c..fb3c501671 100644 --- a/dev/search.json +++ b/dev/search.json @@ -1 +1 @@ -[{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to dplyr","title":"Contributing to dplyr","text":"outlines propose change dplyr. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to dplyr","text":"Small typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES: edit roxygen comment .R file R/. : edit .Rd file man/.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"prerequisites","dir":"","previous_headings":"","what":"Prerequisites","title":"Contributing to dplyr","text":"make substantial pull request, always file issue make sure someone team agrees ’s problem. ’ve found bug, create associated issue illustrate bug minimal reprex.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"","what":"Pull request process","title":"Contributing to dplyr","text":"recommend create Git branch pull request (PR). Look Travis AppVeyor build status making changes. README contain badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat. Contributions test cases included easier accept. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s).","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to dplyr","text":"Please note project released Contributor Code Conduct. participating project agree abide terms.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 dplyr authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/SUPPORT.html","id":null,"dir":"","previous_headings":"","what":"Getting help with dplyr","title":"Getting help with dplyr","text":"Thanks using dplyr. filing issue, places explore pieces put together make process smooth possible. Start making minimal reproducible example using reprex package. haven’t heard used reprex , ’re treat! Seriously, reprex make R-question-asking endeavors easier (pretty insane ROI five ten minutes ’ll take learn ’s ). additional reprex pointers, check Get help! section tidyverse site. Armed reprex, next step figure ask. ’s question: start community.rstudio.com, /StackOverflow. people answer questions. ’s bug: ’re right place, file issue. ’re sure: let community help figure ! problem bug feature request, can easily return report . opening new issue, sure search issues pull requests make sure bug hasn’t reported /already fixed development version. default, search pre-populated :issue :open. can edit qualifiers (e.g. :pr, :closed) needed. example, ’d simply remove :open search issues repo, open closed. right place, need file issue, please review “File issues” paragraph tidyverse contributing guidelines. Thanks help!","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"overview","dir":"Articles","previous_headings":"","what":"Overview","title":"dplyr <-> base R","text":"code dplyr verbs input output data frames. contrasts base R functions frequently work individual vectors. dplyr relies heavily “non-standard evaluation” don’t need use $ refer columns “current” data frame. behaviour inspired base functions subset() transform(). dplyr solutions tend use variety single purpose verbs, base R solutions typically tend use [ variety ways, depending task hand. Multiple dplyr verbs often strung together pipeline %>%. base R, ’ll typically save intermediate results variable either discard, repeatedly overwrite. dplyr verbs handle “grouped” data frames code perform computation per-group looks similar code works whole data frame. base R, per-group operations tend varied forms.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"one-table-verbs","dir":"Articles","previous_headings":"","what":"One table verbs","title":"dplyr <-> base R","text":"following table shows condensed translation dplyr verbs base R equivalents. following sections describe operation detail. ’ll learn dplyr verbs documentation vignette(\"dplyr\"). begin, ’ll load dplyr convert mtcars iris tibbles can easily show abbreviated output operation.","code":"library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris)"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"arrange-arrange-rows-by-variables","dir":"Articles","previous_headings":"One table verbs","what":"arrange(): Arrange rows by variables","title":"dplyr <-> base R","text":"dplyr::arrange() orders rows data frame values one columns: desc() helper allows order selected variables descending order: can replicate base R using [ order(): Note use drop = FALSE. forget , input data frame single column, output vector, data frame. source subtle bugs. Base R provide convenient general way sort individual variables descending order, two options: numeric variables, can use -x. can request order() sort variables descending order.","code":"mtcars %>% arrange(cyl, disp) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 3 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 4 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> # ℹ 28 more rows mtcars %>% arrange(desc(cyl), desc(disp)) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 2 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 3 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 4 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> # ℹ 28 more rows mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 3 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 4 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> # ℹ 28 more rows mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE]"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"distinct-select-distinctunique-rows","dir":"Articles","previous_headings":"One table verbs","what":"distinct(): Select distinct/unique rows","title":"dplyr <-> base R","text":"dplyr::distinct() selects unique rows: two equivalents base R, depending whether want whole data frame, just selected variables:","code":"df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns #> # A tibble: 10 × 1 #> x #> #> 1 7 #> 2 5 #> 3 6 #> 4 4 #> # ℹ 6 more rows df %>% distinct(x, .keep_all = TRUE) # whole data frame #> # A tibble: 10 × 2 #> x y #> #> 1 7 4 #> 2 5 2 #> 3 6 9 #> 4 4 2 #> # ℹ 6 more rows unique(df[\"x\"]) # selected columns #> # A tibble: 10 × 1 #> x #> #> 1 7 #> 2 5 #> 3 6 #> 4 4 #> # ℹ 6 more rows df[!duplicated(df$x), , drop = FALSE] # whole data frame #> # A tibble: 10 × 2 #> x y #> #> 1 7 4 #> 2 5 2 #> 3 6 9 #> 4 4 2 #> # ℹ 6 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"filter-return-rows-with-matching-conditions","dir":"Articles","previous_headings":"One table verbs","what":"filter(): Return rows with matching conditions","title":"dplyr <-> base R","text":"dplyr::filter() selects rows expression TRUE: closest base equivalent (inspiration filter()) subset(): can also use [ also requires use () remove NAs:","code":"starwars %>% filter(species == \"Human\") #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(mass > 1000) #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(hair_color == \"none\" & eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, blue black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> # ℹ 5 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships subset(starwars, species == \"Human\") #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships subset(starwars, mass > 1000) #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships subset(starwars, hair_color == \"none\" & eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, blue black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> # ℹ 5 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[which(starwars$species == \"Human\"), , drop = FALSE] #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[which(starwars$mass > 1000), , drop = FALSE] #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[which(starwars$hair_color == \"none\" & starwars$eye_color == \"black\"), , drop = FALSE] #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, blue black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> # ℹ 5 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"mutate-create-or-transform-variables","dir":"Articles","previous_headings":"One table verbs","what":"mutate(): Create or transform variables","title":"dplyr <-> base R","text":"dplyr::mutate() creates new variables existing variables: closest base equivalent transform(), note use freshly created variables: Alternatively, can use $<-: applied grouped data frame, dplyr::mutate() computes new variable per group: replicate base R, can use ave():","code":"df %>% mutate(z = x + y, z2 = z ^ 2) #> # A tibble: 100 × 4 #> x y z z2 #> #> 1 7 4 11 121 #> 2 5 2 7 49 #> 3 6 9 15 225 #> 4 4 2 6 36 #> # ℹ 96 more rows head(transform(df, z = x + y, z2 = (x + y) ^ 2)) #> x y z z2 #> 1 7 4 11 121 #> 2 5 2 7 49 #> 3 6 9 15 225 #> 4 4 2 6 36 #> 5 6 3 9 81 #> 6 9 3 12 144 mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) #> # A tibble: 4 × 4 #> # Groups: g [2] #> g x x_mean x_rank #> #> 1 1 0.5 1 1 #> 2 1 1.5 1 2 #> 3 2 2.5 3 1 #> 4 2 3.5 3 2 transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) #> g x x_mean x_rank #> 1 1 0.5 1 1 #> 2 1 1.5 1 2 #> 3 2 2.5 3 1 #> 4 2 3.5 3 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"pull-pull-out-a-single-variable","dir":"Articles","previous_headings":"One table verbs","what":"pull(): Pull out a single variable","title":"dplyr <-> base R","text":"dplyr::pull() extracts variable either name position: equivalent [[ positions $ names:","code":"mtcars %>% pull(1) #> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 #> [29] 15.8 19.7 15.0 21.4 mtcars %>% pull(cyl) #> [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4 mtcars[[1]] #> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 #> [29] 15.8 19.7 15.0 21.4 mtcars$cyl #> [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"relocate-change-column-order","dir":"Articles","previous_headings":"One table verbs","what":"relocate(): Change column order","title":"dplyr <-> base R","text":"dplyr::relocate() makes easy move set columns new position (default, front): can replicate base R little set manipulation: Moving columns somewhere middle requires little set twiddling.","code":"# to front mtcars %>% relocate(gear, carb) #> # A tibble: 32 × 13 #> gear carb mpg cyl disp hp drat wt qsec vs am cyl2 #> #> 1 4 4 21 6 160 110 3.9 2.62 16.5 0 1 12 #> 2 4 4 21 6 160 110 3.9 2.88 17.0 0 1 12 #> 3 4 1 22.8 4 108 93 3.85 2.32 18.6 1 1 8 #> 4 3 1 21.4 6 258 110 3.08 3.22 19.4 1 0 12 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl4 # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) #> # A tibble: 32 × 13 #> disp hp drat wt qsec vs am gear carb cyl2 cyl4 mpg #> #> 1 160 110 3.9 2.62 16.5 0 1 4 4 12 24 21 #> 2 160 110 3.9 2.88 17.0 0 1 4 4 12 24 21 #> 3 108 93 3.85 2.32 18.6 1 1 4 1 8 16 22.8 #> 4 258 110 3.08 3.22 19.4 1 0 3 1 12 24 21.4 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl mtcars[union(c(\"gear\", \"carb\"), names(mtcars))] #> # A tibble: 32 × 13 #> gear carb mpg cyl disp hp drat wt qsec vs am cyl2 #> #> 1 4 4 21 6 160 110 3.9 2.62 16.5 0 1 12 #> 2 4 4 21 6 160 110 3.9 2.88 17.0 0 1 12 #> 3 4 1 22.8 4 108 93 3.85 2.32 18.6 1 1 8 #> 4 3 1 21.4 6 258 110 3.08 3.22 19.4 1 0 12 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl4 to_back <- c(\"mpg\", \"cyl\") mtcars[c(setdiff(names(mtcars), to_back), to_back)] #> # A tibble: 32 × 13 #> disp hp drat wt qsec vs am gear carb cyl2 cyl4 mpg #> #> 1 160 110 3.9 2.62 16.5 0 1 4 4 12 24 21 #> 2 160 110 3.9 2.88 17.0 0 1 4 4 12 24 21 #> 3 108 93 3.85 2.32 18.6 1 1 4 1 8 16 22.8 #> 4 258 110 3.08 3.22 19.4 1 0 3 1 12 24 21.4 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl "},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"rename-rename-variables-by-name","dir":"Articles","previous_headings":"One table verbs","what":"rename(): Rename variables by name","title":"dplyr <-> base R","text":"dplyr::rename() allows rename variables name position: Renaming variables position straight forward base R: Renaming variables name requires bit work:","code":"iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) #> # A tibble: 150 × 5 #> sepal_length sepal_width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows iris2 <- iris names(iris2)[2] <- \"sepal_width\" names(iris2)[names(iris2) == \"Sepal.Length\"] <- \"sepal_length\""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"rename_with-rename-variables-with-a-function","dir":"Articles","previous_headings":"One table verbs","what":"rename_with(): Rename variables with a function","title":"dplyr <-> base R","text":"dplyr::rename_with() transform column names function: similar effect can achieved setNames() base R:","code":"iris %>% rename_with(toupper) #> # A tibble: 150 × 5 #> SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows setNames(iris, toupper(names(iris))) #> # A tibble: 150 × 5 #> SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"select-select-variables-by-name","dir":"Articles","previous_headings":"One table verbs","what":"select(): Select variables by name","title":"dplyr <-> base R","text":"dplyr::select() subsets columns position, name, function name, property: Subsetting variables position straightforward base R: two options subset name: Subsetting function name requires bit work grep(): can use Filter() subset type:","code":"iris %>% select(1:3) #> # A tibble: 150 × 3 #> Sepal.Length Sepal.Width Petal.Length #> #> 1 5.1 3.5 1.4 #> 2 4.9 3 1.4 #> 3 4.7 3.2 1.3 #> 4 4.6 3.1 1.5 #> # ℹ 146 more rows iris %>% select(Species, Sepal.Length) #> # A tibble: 150 × 2 #> Species Sepal.Length #> #> 1 setosa 5.1 #> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 #> # ℹ 146 more rows iris %>% select(starts_with(\"Petal\")) #> # A tibble: 150 × 2 #> Petal.Length Petal.Width #> #> 1 1.4 0.2 #> 2 1.4 0.2 #> 3 1.3 0.2 #> 4 1.5 0.2 #> # ℹ 146 more rows iris %>% select(where(is.factor)) #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> # ℹ 146 more rows iris[1:3] # single argument selects columns; never drops #> # A tibble: 150 × 3 #> Sepal.Length Sepal.Width Petal.Length #> #> 1 5.1 3.5 1.4 #> 2 4.9 3 1.4 #> 3 4.7 3.2 1.3 #> 4 4.6 3.1 1.5 #> # ℹ 146 more rows iris[1:3, , drop = FALSE] #> # A tibble: 3 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa iris[c(\"Species\", \"Sepal.Length\")] #> # A tibble: 150 × 2 #> Species Sepal.Length #> #> 1 setosa 5.1 #> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 #> # ℹ 146 more rows subset(iris, select = c(Species, Sepal.Length)) #> # A tibble: 150 × 2 #> Species Sepal.Length #> #> 1 setosa 5.1 #> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 #> # ℹ 146 more rows iris[grep(\"^Petal\", names(iris))] #> # A tibble: 150 × 2 #> Petal.Length Petal.Width #> #> 1 1.4 0.2 #> 2 1.4 0.2 #> 3 1.3 0.2 #> 4 1.5 0.2 #> # ℹ 146 more rows Filter(is.factor, iris) #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> # ℹ 146 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"summarise-reduce-multiple-values-down-to-a-single-value","dir":"Articles","previous_headings":"One table verbs","what":"summarise(): Reduce multiple values down to a single value","title":"dplyr <-> base R","text":"dplyr::summarise() computes one summaries group: think closest base R equivalent uses (). Unfortunately () returns list data frames, can combine back together .call() rbind(): aggregate() comes close providing elegant answer: unfortunately looks like disp.mean disp.n columns, ’s actually single matrix column: can see variety options https://gist.github.com/hadley/c430501804349d382ce90754936ab8ec.","code":"mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) #> # A tibble: 3 × 3 #> cyl mean n #> #> 1 4 105. 11 #> 2 6 183. 7 #> 3 8 353. 14 mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) #> cyl mean n #> 4 4 105.1364 11 #> 6 6 183.3143 7 #> 8 8 353.1000 14 agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg #> cyl disp.mean disp.n #> 1 4 105.1364 11.0000 #> 2 6 183.3143 7.0000 #> 3 8 353.1000 14.0000 str(agg) #> 'data.frame': 3 obs. of 2 variables: #> $ cyl : num 4 6 8 #> $ disp: num [1:3, 1:2] 105 183 353 11 7 ... #> ..- attr(*, \"dimnames\")=List of 2 #> .. ..$ : NULL #> .. ..$ : chr [1:2] \"mean\" \"n\""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"slice-choose-rows-by-position","dir":"Articles","previous_headings":"One table verbs","what":"slice(): Choose rows by position","title":"dplyr <-> base R","text":"slice() selects rows location: straightforward replicate [:","code":"slice(mtcars, 25:n()) #> # A tibble: 8 × 13 #> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 #> #> 1 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 16 #> 2 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 8 #> 3 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 8 #> 4 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 8 #> # ℹ 4 more rows #> # ℹ 1 more variable: cyl4 mtcars[25:nrow(mtcars), , drop = FALSE] #> # A tibble: 8 × 13 #> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 #> #> 1 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 16 #> 2 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 8 #> 3 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 8 #> 4 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 8 #> # ℹ 4 more rows #> # ℹ 1 more variable: cyl4 "},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"two-table-verbs","dir":"Articles","previous_headings":"","what":"Two-table verbs","title":"dplyr <-> base R","text":"want merge two data frames, x y), variety different ways bring together. Various base R merge() calls replaced variety dplyr join() functions. information two-table verbs, see vignette(\"two-table\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"mutating-joins","dir":"Articles","previous_headings":"Two-table verbs","what":"Mutating joins","title":"dplyr <-> base R","text":"dplyr’s inner_join(), left_join(), right_join(), full_join() add new columns y x, matching rows based set “keys”, differ missing matches handled. equivalent calls merge() various settings , .x, .y arguments. main difference order rows: dplyr preserves order x data frame. merge() sorts key columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"filtering-joins","dir":"Articles","previous_headings":"Two-table verbs","what":"Filtering joins","title":"dplyr <-> base R","text":"dplyr’s semi_join() anti_join() affect rows, columns: can replicated base R [ %%: Semi anti joins multiple key variables considerably challenging implement.","code":"band_members %>% semi_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles band_members %>% anti_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 1 × 2 #> name band #> #> 1 Mick Stones band_members[band_members$name %in% band_instruments$name, , drop = FALSE] #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] #> # A tibble: 1 × 2 #> name band #> #> 1 Mick Stones"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"basic-usage","dir":"Articles","previous_headings":"","what":"Basic usage","title":"Column-wise operations","text":"across() two primary arguments: first argument, .cols, selects columns want operate . uses tidy selection (like select()) can pick variables position, name, type. second argument, .fns, function list functions apply column. can also purrr style formula (list formulas) like ~ .x / 2. (argument optional, can omit just want get underlying data; ’ll see technique used vignette(\"rowwise\").) couple examples across() conjunction favourite verb, summarise(). can use across() dplyr verb, ’ll see little later. across() usually used combination summarise() mutate(), doesn’t select grouping variables order avoid accidentally modifying :","code":"starwars %>% summarise(across(where(is.character), n_distinct)) #> # A tibble: 1 × 8 #> name hair_color skin_color eye_color sex gender homeworld species #> #> 1 87 12 31 15 5 3 49 38 starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) #> # A tibble: 9 × 4 #> species sex gender homeworld #> #> 1 Droid 1 2 3 #> 2 Gungan 1 1 1 #> 3 Human 2 2 15 #> 4 Kaminoan 2 2 1 #> # ℹ 5 more rows starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 10 × 4 #> homeworld height mass birth_year #> #> 1 Alderaan 176. 64 43 #> 2 Corellia 175 78.5 25 #> 3 Coruscant 174. 50 91 #> 4 Kamino 208. 83.1 31.5 #> # ℹ 6 more rows df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) #> # A tibble: 2 × 3 #> g x y #> #> 1 1 0 -5 #> 2 2 3 -9"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"multiple-functions","dir":"Articles","previous_headings":"Basic usage","what":"Multiple functions","title":"Column-wise operations","text":"can transform variable one function supplying named list functions lambda functions second argument: Control names created .names argument takes glue spec: ’d prefer summaries function grouped together, ’ll expand calls : (One day might become argument across() ’re yet sure work.) however use (.numeric) last case second across() pick variables newly created (“min_height”, “min_mass” “min_birth_year”). can work around combining calls across() single expression returns tibble: Alternatively reorganize results relocate():","code":"min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) #> # A tibble: 1 × 6 #> height_min height_max mass_min mass_max birth_year_min birth_year_max #> #> 1 66 264 15 1358 8 896 starwars %>% summarise(across(c(height, mass, birth_year), min_max)) #> # A tibble: 1 × 6 #> height_min height_max mass_min mass_max birth_year_min birth_year_max #> #> 1 66 264 15 1358 8 896 starwars %>% summarise(across(where(is.numeric), min_max, .names = \"{.fn}.{.col}\")) #> # A tibble: 1 × 6 #> min.height max.height min.mass max.mass min.birth_year max.birth_year #> #> 1 66 264 15 1358 8 896 starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = \"{.fn}.{.col}\")) #> # A tibble: 1 × 6 #> min.height max.height min.mass max.mass min.birth_year max.birth_year #> #> 1 66 264 15 1358 8 896 starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = \"min_{.col}\"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = \"max_{.col}\") ) #> # A tibble: 1 × 6 #> min_height min_mass min_birth_year max_height max_mass max_birth_year #> #> 1 66 15 8 264 1358 896 starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = \"min_{.col}\"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = \"max_{.col}\") ) ) #> # A tibble: 1 × 6 #> min_height min_mass min_birth_year max_height max_mass max_birth_year #> #> 1 66 15 8 264 1358 896 starwars %>% summarise(across(where(is.numeric), min_max, .names = \"{.fn}.{.col}\")) %>% relocate(starts_with(\"min\")) #> # A tibble: 1 × 6 #> min.height min.mass min.birth_year max.height max.mass max.birth_year #> #> 1 66 15 8 264 1358 896"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"current-column","dir":"Articles","previous_headings":"Basic usage","what":"Current column","title":"Column-wise operations","text":"need , can access name “current” column inside calling cur_column(). can useful want perform sort context dependent transformation ’s already encoded vector:","code":"df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) #> # A tibble: 3 × 3 #> x y z #> #> 1 1 30 500 #> 2 2 40 600 #> 3 3 50 700"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"gotchas","dir":"Articles","previous_headings":"Basic usage","what":"Gotchas","title":"Column-wise operations","text":"careful combining numeric summaries (.numeric): n becomes NA n numeric, across() computes standard deviation, standard deviation 3 (constant) NA. probably want compute n() last avoid problem: Alternatively, explicitly exclude n columns operate : Another approach combine call n() across() single expression returns tibble:","code":"df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) #> n x y #> 1 NA 1 4.041452 df %>% summarise(across(where(is.numeric), sd), n = n()) #> x y n #> 1 1 4.041452 3 df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) #> n x y #> 1 3 1 4.041452 df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) #> n x y #> 1 3 1 4.041452"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"other-verbs","dir":"Articles","previous_headings":"Basic usage","what":"Other verbs","title":"Column-wise operations","text":"far ’ve focused use across() summarise(), works dplyr verb uses data masking: Rescale numeric variables range 0-1: verbs, like group_by(), count() distinct(), don’t need supply summary function, can useful use tidy-selection dynamically select set columns. cases, recommend using complement across(), pick(), works like across() doesn’t apply functions instead returns data frame containing selected columns. Find distinct Count combinations variables given pattern: across() doesn’t work select() rename() already use tidy select syntax; want transform column names function, can use rename_with().","code":"rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) #> # A tibble: 4 × 2 #> x y #> #> 1 0 0.385 #> 2 0.333 1 #> 3 0.667 0 #> 4 1 0.903 starwars %>% distinct(pick(contains(\"color\"))) #> # A tibble: 67 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 63 more rows starwars %>% count(pick(contains(\"color\")), sort = TRUE) #> # A tibble: 67 × 4 #> hair_color skin_color eye_color n #> #> 1 brown light brown 6 #> 2 brown fair blue 4 #> 3 none grey black 4 #> 4 black dark brown 3 #> # ℹ 63 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"filter","dir":"Articles","previous_headings":"Basic usage","what":"filter()","title":"Column-wise operations","text":"directly use across() filter() need extra step combine results. end, filter() two special purpose companion functions: if_any() keeps rows predicate true least one selected column: if_all() keeps rows predicate true selected columns:","code":"starwars %>% filter(if_any(everything(), ~ !is.na(.x))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(if_all(everything(), ~ !is.na(.x))) #> # A tibble: 29 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 25 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"if-_at-_all","dir":"Articles","previous_headings":"","what":"_if, _at, _all","title":"Column-wise operations","text":"Prior versions dplyr allowed apply function multiple columns different way: using functions _if, _at, _all() suffixes. functions solved pressing need used many people, now superseded. means ’ll stay around, won’t receive new features get critical bug fixes.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"why-do-we-like-across","dir":"Articles","previous_headings":"_if, _at, _all","what":"Why do we like across()?","title":"Column-wise operations","text":"decide move away functions favour across()? across() makes possible express useful summaries previously impossible: across() reduces number functions dplyr needs provide. makes dplyr easier use (fewer functions remember) easier us implement new verbs (since need implement one function, four). across() unifies _if _at semantics can select position, name, type, can now create compound selections previously impossible. example, can now transform numeric columns whose name begins “x”: across((.numeric) & starts_with(\"x\")). across() doesn’t need use vars(). _at() functions place dplyr manually quote variable names, makes little weird hence harder remember.","code":"df %>% group_by(g1, g2) %>% summarise( across(where(is.numeric), mean), across(where(is.factor), nlevels), n = n(), )"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"why-did-it-take-so-long-to-discover-across","dir":"Articles","previous_headings":"_if, _at, _all","what":"Why did it take so long to discover across()?","title":"Column-wise operations","text":"’s disappointing didn’t discover across() earlier, instead worked several false starts (first realising common problem, _each() functions, recently _if()/_at()/_all() functions). across() couldn’t work without three recent discoveries: can column data frame data frame. something provided base R, ’s well documented, took see useful, just theoretical curiosity. can use data frames allow summary functions return multiple columns. can use absence outer name convention want unpack data frame column individual columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"how-do-you-convert-existing-code","dir":"Articles","previous_headings":"_if, _at, _all","what":"How do you convert existing code?","title":"Column-wise operations","text":"Fortunately, ’s generally straightforward translate existing code use across(): Strip _if(), _at() _all() suffix function. Call across(). first argument : _if(), old second argument wrapped (). _at(), old second argument, call vars() removed. _all(), everything(). subsequent arguments can copied . example: exceptions rule: rename_*() select_*() follow different pattern. already select semantics, generally used different way doesn’t direct equivalent across(); use new rename_with() instead. Previously, filter_*() paired all_vars() any_vars() helpers. new helpers if_any() if_all() can used inside filter() keep rows predicate true least one, selected columns: used mutate(), transformations performed across() applied . different behaviour mutate_if(), mutate_at(), mutate_all(), apply transformations one time. expect ’ll generally find new behaviour less surprising:","code":"df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE)) # -> df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) df %>% mutate_at(vars(c(x, starts_with(\"y\"))), mean) # -> df %>% mutate(across(c(x, starts_with(\"y\")), mean)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) df <- tibble(x = c(\"a\", \"b\"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) #> # A tibble: 1 × 3 #> x y z #> #> 1 b 1 1 # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) #> # A tibble: 2 × 3 #> x y z #> #> 1 a 1 -1 #> 2 b 1 1 df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) #> # A tibble: 1 × 3 #> x y z #> #> 1 0.5 1 8 df %>% mutate(across(everything(), ~ .x / y)) #> # A tibble: 1 × 3 #> x y z #> #> 1 0.5 1 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"data-starwars","dir":"Articles","previous_headings":"","what":"Data: starwars","title":"Introduction to dplyr","text":"explore basic data manipulation verbs dplyr, ’ll use dataset starwars. dataset contains 87 characters comes Star Wars API, documented ?starwars Note starwars tibble, modern reimagining data frame. ’s particularly useful large datasets prints first rows. can learn tibbles https://tibble.tidyverse.org; particular can convert data frames tibbles as_tibble().","code":"dim(starwars) #> [1] 87 14 starwars #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"single-table-verbs","dir":"Articles","previous_headings":"","what":"Single table verbs","title":"Introduction to dplyr","text":"dplyr aims provide function basic verb data manipulation. verbs can organised three categories based component dataset work : filter() chooses rows based column values. slice() chooses rows based location. arrange() changes order rows. select() changes whether column included. rename() changes name columns. mutate() changes values columns creates new columns. relocate() changes order columns. summarise() collapses group single row.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"the-pipe","dir":"Articles","previous_headings":"Single table verbs","what":"The pipe","title":"Introduction to dplyr","text":"dplyr functions take data frame (tibble) first argument. Rather forcing user either save intermediate objects nest functions, dplyr provides %>% operator magrittr. x %>% f(y) turns f(x, y) result one step “piped” next step. can use pipe rewrite multiple operations can read left--right, top--bottom (reading pipe operator “”).","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"filter-rows-with-filter","dir":"Articles","previous_headings":"Single table verbs","what":"Filter rows with filter()","title":"Introduction to dplyr","text":"filter() allows select subset rows data frame. Like single verbs, first argument tibble (data frame). second subsequent arguments refer variables within data frame, selecting rows expression TRUE. example, can select character light skin color brown eyes : roughly equivalent base R code:","code":"starwars %>% filter(skin_color == \"light\", eye_color == \"brown\") #> # A tibble: 7 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Leia Orga… 150 49 brown light brown 19 fema… #> 2 Biggs Dar… 183 84 black light brown 24 male #> 3 Padmé Ami… 185 45 brown light brown 46 fema… #> 4 Cordé 157 NA brown light brown NA NA #> # ℹ 3 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[starwars$skin_color == \"light\" & starwars$eye_color == \"brown\", ]"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"arrange-rows-with-arrange","dir":"Articles","previous_headings":"Single table verbs","what":"Arrange rows with arrange()","title":"Introduction to dplyr","text":"arrange() works similarly filter() except instead filtering selecting rows, reorders . takes data frame, set column names (complicated expressions) order . provide one column name, additional column used break ties values preceding columns: Use desc() order column descending order:","code":"starwars %>% arrange(height, mass) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Yoda 66 17 white green brown 896 male #> 2 Ratts Tye… 79 15 none grey, blue unknown NA male #> 3 Wicket Sy… 88 20 brown brown brown 8 male #> 4 Dud Bolt 94 45 none blue, grey yellow NA male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% arrange(desc(height)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Yarael Po… 264 NA none white yellow NA male #> 2 Tarfful 234 136 brown brown blue NA male #> 3 Lama Su 229 88 none grey black NA male #> 4 Chewbacca 228 112 brown unknown blue 200 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"choose-rows-using-their-position-with-slice","dir":"Articles","previous_headings":"Single table verbs","what":"Choose rows using their position with slice()","title":"Introduction to dplyr","text":"slice() lets index rows (integer) locations. allows select, remove, duplicate rows. can get characters row numbers 5 10. accompanied number helpers common use cases: slice_head() slice_tail() select first last rows. slice_sample() randomly selects rows. Use option prop choose certain proportion cases. Use replace = TRUE perform bootstrap sample. needed, can weight sample weight argument. slice_min() slice_max() select rows highest lowest values variable. Note first must choose values NA.","code":"starwars %>% slice(5:10) #> # A tibble: 6 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Leia Orga… 150 49 brown light brown 19 fema… #> 2 Owen Lars 178 120 brown, gr… light blue 52 male #> 3 Beru Whit… 165 75 brown light blue 47 fema… #> 4 R5-D4 97 32 NA white, red red NA none #> # ℹ 2 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% slice_head(n = 3) #> # A tibble: 3 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% slice_sample(n = 5) #> # A tibble: 5 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Ayla Secu… 178 55 none blue hazel 48 fema… #> 2 Bossk 190 113 none green red 53 male #> 3 San Hill 191 NA none grey gold NA male #> 4 Luminara … 170 56.2 black yellow blue 58 fema… #> # ℹ 1 more row #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% slice_sample(prop = 0.1) #> # A tibble: 8 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Qui-Gon J… 193 89 brown fair blue 92 male #> 2 Jango Fett 183 79 black tan brown 66 male #> 3 Jocasta Nu 167 NA white fair blue NA fema… #> 4 Zam Wesell 168 55 blonde fair, gre… yellow NA fema… #> # ℹ 4 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) #> # A tibble: 3 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Yarael Po… 264 NA none white yellow NA male #> 2 Tarfful 234 136 brown brown blue NA male #> 3 Lama Su 229 88 none grey black NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"select-columns-with-select","dir":"Articles","previous_headings":"Single table verbs","what":"Select columns with select()","title":"Introduction to dplyr","text":"Often work large datasets many columns actually interest . select() allows rapidly zoom useful subset using operations usually work numeric variable positions: number helper functions can use within select(), like starts_with(), ends_with(), matches() contains(). let quickly match larger blocks variables meet criterion. See ?select details. can rename variables select() using named arguments: select() drops variables explicitly mentioned, ’s useful. Instead, use rename():","code":"# Select columns by name starwars %>% select(hair_color, skin_color, eye_color) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) #> # A tibble: 87 × 11 #> name height mass birth_year sex gender homeworld species films #> #> 1 Luke Skywa… 172 77 19 male mascu… Tatooine Human #> 2 C-3PO 167 75 112 none mascu… Tatooine Droid #> 3 R2-D2 96 32 33 none mascu… Naboo Droid #> 4 Darth Vader 202 136 41.9 male mascu… Tatooine Human #> # ℹ 83 more rows #> # ℹ 2 more variables: vehicles , starships # Select all columns ending with color starwars %>% select(ends_with(\"color\")) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows starwars %>% select(home_world = homeworld) #> # A tibble: 87 × 1 #> home_world #> #> 1 Tatooine #> 2 Tatooine #> 3 Naboo #> 4 Tatooine #> # ℹ 83 more rows starwars %>% rename(home_world = homeworld) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , home_world , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"add-new-columns-with-mutate","dir":"Articles","previous_headings":"Single table verbs","what":"Add new columns with mutate()","title":"Introduction to dplyr","text":"Besides selecting sets existing columns, ’s often useful add new columns functions existing columns. job mutate(): can’t see height meters just calculated, can fix using select command. dplyr::mutate() similar base transform(), allows refer columns ’ve just created: want keep new variables, use .keep = \"none\":","code":"starwars %>% mutate(height_m = height / 100) #> # A tibble: 87 × 15 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 7 more variables: gender , homeworld , species , #> # films , vehicles , starships , height_m starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) #> # A tibble: 87 × 15 #> height_m height name mass hair_color skin_color eye_color birth_year #> #> 1 1.72 172 Luke S… 77 blond fair blue 19 #> 2 1.67 167 C-3PO 75 NA gold yellow 112 #> 3 0.96 96 R2-D2 32 NA white, bl… red 33 #> 4 2.02 202 Darth … 136 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 7 more variables: sex , gender , homeworld , #> # species , films , vehicles , starships starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) #> # A tibble: 87 × 16 #> BMI name height mass hair_color skin_color eye_color birth_year #> #> 1 26.0 Luke Skyw… 172 77 blond fair blue 19 #> 2 26.9 C-3PO 167 75 NA gold yellow 112 #> 3 34.7 R2-D2 96 32 NA white, bl… red 33 #> 4 33.3 Darth Vad… 202 136 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 8 more variables: sex , gender , homeworld , #> # species , films , vehicles , starships , #> # height_m starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2), .keep = \"none\" ) #> # A tibble: 87 × 2 #> height_m BMI #> #> 1 1.72 26.0 #> 2 1.67 26.9 #> 3 0.96 34.7 #> 4 2.02 33.3 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"change-column-order-with-relocate","dir":"Articles","previous_headings":"Single table verbs","what":"Change column order with relocate()","title":"Introduction to dplyr","text":"Use similar syntax select() move blocks columns ","code":"starwars %>% relocate(sex:homeworld, .before = height) #> # A tibble: 87 × 14 #> name sex gender homeworld height mass hair_color skin_color #> #> 1 Luke Skywalker male mascu… Tatooine 172 77 blond fair #> 2 C-3PO none mascu… Tatooine 167 75 NA gold #> 3 R2-D2 none mascu… Naboo 96 32 NA white, bl… #> 4 Darth Vader male mascu… Tatooine 202 136 none white #> # ℹ 83 more rows #> # ℹ 6 more variables: eye_color , birth_year , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"summarise-values-with-summarise","dir":"Articles","previous_headings":"Single table verbs","what":"Summarise values with summarise()","title":"Introduction to dplyr","text":"last verb summarise(). collapses data frame single row. ’s useful learn group_by() verb .","code":"starwars %>% summarise(height = mean(height, na.rm = TRUE)) #> # A tibble: 1 × 1 #> height #> #> 1 175."},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"commonalities","dir":"Articles","previous_headings":"Single table verbs","what":"Commonalities","title":"Introduction to dplyr","text":"may noticed syntax function verbs similar: first argument data frame. subsequent arguments describe data frame. can refer columns data frame directly without using $. result new data frame Together properties make easy chain together multiple simple steps achieve complex result. five functions provide basis language data manipulation. basic level, can alter tidy data frame five useful ways: can reorder rows (arrange()), pick observations variables interest (filter() select()), add new variables functions existing variables (mutate()), collapse many values summary (summarise()).","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"combining-functions-with","dir":"Articles","previous_headings":"","what":"Combining functions with %>%","title":"Introduction to dplyr","text":"dplyr API functional sense function calls don’t side-effects. must always save results. doesn’t lead particularly elegant code, especially want many operations . either step--step: don’t want name intermediate results, need wrap function calls inside : difficult read order operations inside . Thus, arguments long way away function. get around problem, dplyr provides %>% operator magrittr. x %>% f(y) turns f(x, y) can use rewrite multiple operations can read left--right, top--bottom (reading pipe operator “”):","code":"a1 <- group_by(starwars, species, sex) a2 <- select(a1, height, mass) a3 <- summarise(a2, height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) #> Adding missing grouping variables: `species`, `sex` #> `summarise()` has grouped output by 'species'. You can override using the #> `.groups` argument. #> # A tibble: 41 × 4 #> # Groups: species [38] #> species sex height mass #> #> 1 Aleena male 79 15 #> 2 Besalisk male 198 102 #> 3 Cerean male 198 82 #> 4 Chagrian male 196 NaN #> # ℹ 37 more rows starwars %>% group_by(species, sex) %>% select(height, mass) %>% summarise( height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) )"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"patterns-of-operations","dir":"Articles","previous_headings":"","what":"Patterns of operations","title":"Introduction to dplyr","text":"dplyr verbs can classified type operations accomplish (sometimes speak semantics, .e., meaning). ’s helpful good grasp difference select mutate operations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"selecting-operations","dir":"Articles","previous_headings":"Patterns of operations","what":"Selecting operations","title":"Introduction to dplyr","text":"One appealing features dplyr can refer columns tibble regular variables. However, syntactic uniformity referring bare column names hides semantical differences across verbs. column symbol supplied select() meaning symbol supplied mutate(). Selecting operations expect column names positions. Hence, call select() bare variable names, actually represent positions tibble. following calls completely equivalent dplyr’s point view: token, means refer variables surrounding context name one columns. following example, height still represents 2, 5: One useful subtlety applies bare names selecting calls like c(height, mass) height:mass. cases, columns data frame put scope. allows refer contextual variables selection helpers: semantics usually intuitive. note subtle difference: first argument, name represents position 1. second argument, name evaluated surrounding context represents fifth column. long time, select() used understand column positions. Counting dplyr 0.6, now understands column names well. makes bit easier program select():","code":"# `name` represents the integer 1 select(starwars, name) #> # A tibble: 87 × 1 #> name #> #> 1 Luke Skywalker #> 2 C-3PO #> 3 R2-D2 #> 4 Darth Vader #> # ℹ 83 more rows select(starwars, 1) #> # A tibble: 87 × 1 #> name #> #> 1 Luke Skywalker #> 2 C-3PO #> 3 R2-D2 #> 4 Darth Vader #> # ℹ 83 more rows height <- 5 select(starwars, height) #> # A tibble: 87 × 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # ℹ 83 more rows name <- \"color\" select(starwars, ends_with(name)) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows name <- 5 select(starwars, name, identity(name)) #> # A tibble: 87 × 2 #> name skin_color #> #> 1 Luke Skywalker fair #> 2 C-3PO gold #> 3 R2-D2 white, blue #> 4 Darth Vader white #> # ℹ 83 more rows vars <- c(\"name\", \"height\") select(starwars, all_of(vars), \"mass\") #> # A tibble: 87 × 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"mutating-operations","dir":"Articles","previous_headings":"Patterns of operations","what":"Mutating operations","title":"Introduction to dplyr","text":"Mutate semantics quite different selection semantics. Whereas select() expects column names positions, mutate() expects column vectors. set smaller tibble use examples. use select(), bare column names stand positions tibble. mutate() hand, column symbols represent actual column vectors stored tibble. Consider happens give string number mutate(): mutate() gets length-1 vectors interprets new columns data frame. vectors recycled match number rows. ’s doesn’t make sense supply expressions like \"height\" + 10 mutate(). amounts adding 10 string! correct expression : way, can unquote values context values represent valid column. must either length 1 (get recycled) length number rows. following example create new vector add data frame: case point group_by(). might think select semantics, actually mutate semantics. quite handy allows group modified column: can’t supply column name group_by(). amounts creating new column containing string recycled number rows:","code":"df <- starwars %>% select(name, height, mass) mutate(df, \"height\", 2) #> # A tibble: 87 × 5 #> name height mass `\"height\"` `2` #> #> 1 Luke Skywalker 172 77 height 2 #> 2 C-3PO 167 75 height 2 #> 3 R2-D2 96 32 height 2 #> 4 Darth Vader 202 136 height 2 #> # ℹ 83 more rows mutate(df, height + 10) #> # A tibble: 87 × 4 #> name height mass `height + 10` #> #> 1 Luke Skywalker 172 77 182 #> 2 C-3PO 167 75 177 #> 3 R2-D2 96 32 106 #> 4 Darth Vader 202 136 212 #> # ℹ 83 more rows var <- seq(1, nrow(df)) mutate(df, new = var) #> # A tibble: 87 × 4 #> name height mass new #> #> 1 Luke Skywalker 172 77 1 #> 2 C-3PO 167 75 2 #> 3 R2-D2 96 32 3 #> 4 Darth Vader 202 136 4 #> # ℹ 83 more rows group_by(starwars, sex) #> # A tibble: 87 × 14 #> # Groups: sex [5] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships group_by(starwars, sex = as.factor(sex)) #> # A tibble: 87 × 14 #> # Groups: sex [5] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships group_by(starwars, height_binned = cut(height, 3)) #> # A tibble: 87 × 15 #> # Groups: height_binned [4] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 7 more variables: gender , homeworld , species , #> # films , vehicles , starships , height_binned group_by(df, \"month\") #> # A tibble: 87 × 4 #> # Groups: \"month\" [1] #> name height mass `\"month\"` #> #> 1 Luke Skywalker 172 77 month #> 2 C-3PO 167 75 month #> 3 R2-D2 96 32 month #> 4 Darth Vader 202 136 month #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"group_by","dir":"Articles","previous_headings":"","what":"group_by()","title":"Grouped data","text":"important grouping verb group_by(): takes data frame one variables group : can see grouping print data: use tally() count number rows group. sort argument useful want see largest groups front. well grouping existing variables, can group function existing variables. equivalent performing mutate() group_by():","code":"by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) by_species #> # A tibble: 87 × 14 #> # Groups: species [38] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships by_sex_gender #> # A tibble: 87 × 14 #> # Groups: sex, gender [6] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships by_species %>% tally() #> # A tibble: 38 × 2 #> species n #> #> 1 Aleena 1 #> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 #> # ℹ 34 more rows by_sex_gender %>% tally(sort = TRUE) #> # A tibble: 6 × 3 #> # Groups: sex [5] #> sex gender n #> #> 1 male masculine 60 #> 2 female feminine 16 #> 3 none masculine 5 #> 4 NA NA 4 #> # ℹ 2 more rows bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() #> # A tibble: 5 × 2 #> bmi_cat n #> #> 1 (0,18.5] 10 #> 2 (18.5,25] 24 #> 3 (25,30] 13 #> 4 (30,Inf] 12 #> # ℹ 1 more row"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"group-metadata","dir":"Articles","previous_headings":"","what":"Group metadata","title":"Grouped data","text":"can see underlying group data group_keys(). one row group one column grouping variable: can see group row belongs group_indices(): rows group contains group_rows(): Use group_vars() just want names grouping variables:","code":"by_species %>% group_keys() #> # A tibble: 38 × 1 #> species #> #> 1 Aleena #> 2 Besalisk #> 3 Cerean #> 4 Chagrian #> # ℹ 34 more rows by_sex_gender %>% group_keys() #> # A tibble: 6 × 2 #> sex gender #> #> 1 female feminine #> 2 hermaphroditic masculine #> 3 male masculine #> 4 none feminine #> # ℹ 2 more rows by_species %>% group_indices() #> [1] 11 6 6 11 11 11 11 6 11 11 11 11 34 11 24 12 11 38 36 11 11 6 31 #> [24] 11 11 18 11 11 8 26 11 21 11 11 10 10 10 11 30 7 11 11 37 32 32 1 #> [47] 33 35 29 11 3 20 37 27 13 23 16 4 38 38 11 9 17 17 11 11 11 11 5 #> [70] 2 15 15 11 6 25 19 28 14 34 11 38 22 11 11 11 6 11 by_species %>% group_rows() %>% head() #> [6]> #> [[1]] #> [1] 46 #> #> [[2]] #> [1] 70 #> #> [[3]] #> [1] 51 #> #> [[4]] #> [1] 58 #> #> [[5]] #> [1] 69 #> #> [[6]] #> [1] 2 3 8 22 74 86 by_species %>% group_vars() #> [1] \"species\" by_sex_gender %>% group_vars() #> [1] \"sex\" \"gender\""},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"changing-and-adding-to-grouping-variables","dir":"Articles","previous_headings":"Group metadata","what":"Changing and adding to grouping variables","title":"Grouped data","text":"apply group_by() already grouped dataset, overwrite existing grouping variables. example, following code groups homeworld instead species: augment grouping, using .add = TRUE1. example, following code groups species homeworld:","code":"by_species %>% group_by(homeworld) %>% tally() #> # A tibble: 49 × 2 #> homeworld n #> #> 1 Alderaan 3 #> 2 Aleen Minor 1 #> 3 Bespin 1 #> 4 Bestine IV 1 #> # ℹ 45 more rows by_species %>% group_by(homeworld, .add = TRUE) %>% tally() #> # A tibble: 57 × 3 #> # Groups: species [38] #> species homeworld n #> #> 1 Aleena Aleen Minor 1 #> 2 Besalisk Ojom 1 #> 3 Cerean Cerea 1 #> 4 Chagrian Champala 1 #> # ℹ 53 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"removing-grouping-variables","dir":"Articles","previous_headings":"Group metadata","what":"Removing grouping variables","title":"Grouped data","text":"remove grouping variables, use ungroup(): can also choose selectively ungroup listing variables want remove:","code":"by_species %>% ungroup() %>% tally() #> # A tibble: 1 × 1 #> n #> #> 1 87 by_sex_gender %>% ungroup(sex) %>% tally() #> # A tibble: 3 × 2 #> gender n #> #> 1 feminine 17 #> 2 masculine 66 #> 3 NA 4"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"verbs","dir":"Articles","previous_headings":"","what":"Verbs","title":"Grouped data","text":"following sections describe grouping affects main dplyr verbs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"summarise","dir":"Articles","previous_headings":"Verbs","what":"summarise()","title":"Grouped data","text":"summarise() computes summary group. means starts group_keys(), adding summary variables right hand side: .groups= argument controls grouping structure output. historical behaviour removing right hand side grouping variable corresponds .groups = \"drop_last\" without message .groups = NULL message (default). Since version 1.0.0 groups may also kept (.groups = \"keep\") dropped (.groups = \"drop\"). output longer grouping variables, becomes ungrouped (.e. regular tibble).","code":"by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) #> # A tibble: 38 × 3 #> species n height #> #> 1 Aleena 1 79 #> 2 Besalisk 1 198 #> 3 Cerean 1 198 #> 4 Chagrian 1 196 #> # ℹ 34 more rows by_sex_gender %>% summarise(n = n()) %>% group_vars() #> `summarise()` has grouped output by 'sex'. You can override using the #> `.groups` argument. #> [1] \"sex\" by_sex_gender %>% summarise(n = n(), .groups = \"drop_last\") %>% group_vars() #> [1] \"sex\" by_sex_gender %>% summarise(n = n(), .groups = \"keep\") %>% group_vars() #> [1] \"sex\" \"gender\" by_sex_gender %>% summarise(n = n(), .groups = \"drop\") %>% group_vars() #> character(0)"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"select-rename-and-relocate","dir":"Articles","previous_headings":"Verbs","what":"select(), rename(), and relocate()","title":"Grouped data","text":"rename() relocate() behave identically grouped ungrouped data affect name position existing columns. Grouped select() almost identical ungrouped select, except always includes grouping variables: don’t want grouping variables, ’ll first ungroup(). (design possibly mistake, ’re stuck now.)","code":"by_species %>% select(mass) #> Adding missing grouping variables: `species` #> # A tibble: 87 × 2 #> # Groups: species [38] #> species mass #> #> 1 Human 77 #> 2 Droid 75 #> 3 Droid 32 #> 4 Human 136 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"arrange","dir":"Articles","previous_headings":"Verbs","what":"arrange()","title":"Grouped data","text":"Grouped arrange() ungrouped arrange(), unless set .by_group = TRUE, case order first grouping variables. Note second example sorted species (group_by() statement) mass (within species).","code":"by_species %>% arrange(desc(mass)) %>% relocate(species, mass) #> # A tibble: 87 × 14 #> # Groups: species [38] #> species mass name height hair_color skin_color eye_color birth_year #> #> 1 Hutt 1358 Jabba D… 175 NA green-tan… orange 600 #> 2 Kaleesh 159 Grievous 216 none brown, wh… green, y… NA #> 3 Droid 140 IG-88 200 none metal red 15 #> 4 Human 136 Darth V… 202 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 6 more variables: sex , gender , homeworld , #> # films , vehicles , starships by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) #> # A tibble: 87 × 14 #> # Groups: species [38] #> species mass name height hair_color skin_color eye_color birth_year #> #> 1 Aleena 15 Ratts … 79 none grey, blue unknown NA #> 2 Besalisk 102 Dexter… 198 none brown yellow NA #> 3 Cerean 82 Ki-Adi… 198 white pale yellow 92 #> 4 Chagrian NA Mas Am… 196 none blue blue NA #> # ℹ 83 more rows #> # ℹ 6 more variables: sex , gender , homeworld , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"mutate","dir":"Articles","previous_headings":"Verbs","what":"mutate()","title":"Grouped data","text":"simple cases vectorised functions, grouped ungrouped mutate() give results. differ used summary functions: window functions like min_rank():","code":"# Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> name homeworld mass standard_mass #> #> 1 Luke Skywalker Tatooine 77 -20.3 #> 2 C-3PO Tatooine 75 -22.3 #> 3 R2-D2 Naboo 32 -65.3 #> 4 Darth Vader Tatooine 136 38.7 #> # ℹ 83 more rows # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> # Groups: homeworld [49] #> name homeworld mass standard_mass #> #> 1 Luke Skywalker Tatooine 77 -8.38 #> 2 C-3PO Tatooine 75 -10.4 #> 3 R2-D2 Naboo 32 -32.2 #> 4 Darth Vader Tatooine 136 50.6 #> # ℹ 83 more rows # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) #> # A tibble: 87 × 4 #> name homeworld height rank #> #> 1 Luke Skywalker Tatooine 172 28 #> 2 C-3PO Tatooine 167 20 #> 3 R2-D2 Naboo 96 5 #> 4 Darth Vader Tatooine 202 72 #> # ℹ 83 more rows # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) #> # A tibble: 87 × 4 #> # Groups: homeworld [49] #> name homeworld height rank #> #> 1 Luke Skywalker Tatooine 172 5 #> 2 C-3PO Tatooine 167 4 #> 3 R2-D2 Naboo 96 1 #> 4 Darth Vader Tatooine 202 10 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"filter","dir":"Articles","previous_headings":"Verbs","what":"filter()","title":"Grouped data","text":"grouped filter() effectively mutate() generate logical variable, keeps rows variable TRUE. means grouped filters can used summary functions. example, can find tallest character species: can also use filter() remove entire groups. example, following code eliminates groups single member:","code":"by_species %>% select(name, species, height) %>% filter(height == max(height)) #> # A tibble: 36 × 3 #> # Groups: species [36] #> name species height #> #> 1 Greedo Rodian 173 #> 2 Jabba Desilijic Tiure Hutt 175 #> 3 Yoda Yoda's species 66 #> 4 Bossk Trandoshan 190 #> # ℹ 32 more rows by_species %>% filter(n() != 1) %>% tally() #> # A tibble: 9 × 2 #> species n #> #> 1 Droid 6 #> 2 Gungan 3 #> 3 Human 35 #> 4 Kaminoan 2 #> # ℹ 5 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"slice-and-friends","dir":"Articles","previous_headings":"Verbs","what":"slice() and friends","title":"Grouped data","text":"slice() friends (slice_head(), slice_tail(), slice_sample(), slice_min() slice_max()) select rows within group. example, can select first observation within species: Similarly, can use slice_min() select smallest n values variable:","code":"by_species %>% relocate(species) %>% slice(1) #> # A tibble: 38 × 14 #> # Groups: species [38] #> species name height mass hair_color skin_color eye_color birth_year #> #> 1 Aleena Ratts … 79 15 none grey, blue unknown NA #> 2 Besalisk Dexter… 198 102 none brown yellow NA #> 3 Cerean Ki-Adi… 198 82 white pale yellow 92 #> 4 Chagrian Mas Am… 196 NA none blue blue NA #> # ℹ 34 more rows #> # ℹ 6 more variables: sex , gender , homeworld , #> # films , vehicles , starships by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) #> # A tibble: 47 × 14 #> # Groups: species [38] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Ratts Tye… 79 15 none grey, blue unknown NA male #> 2 Dexter Je… 198 102 none brown yellow NA male #> 3 Ki-Adi-Mu… 198 82 white pale yellow 92 male #> 4 Mas Amedda 196 NA none blue blue NA male #> # ℹ 43 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"join-helpers","dir":"Articles","previous_headings":"","what":"Join helpers","title":"Using dplyr in packages","text":"dplyr 1.1.0, ’ve introduced join_by() along 4 helpers performing various types joins: closest() () within() overlaps() join_by() implements domain specific language (DSL) joins, internally interprets calls functions. ’ll notice dplyr::closest() isn’t exported function dplyr (dplyr::() base::within() happen preexisting functions). use closest() package, cause R CMD check note letting know ’ve used symbol doesn’t belong package. silence , place utils::globalVariables(\"closest\") source file package (outside function). dbplyr similar thing SQL functions, can see example . may also add utils package Imports, even though base package. can easily usethis::use_package(\"utils\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"data-masking-and-tidy-selection-notes","dir":"Articles","previous_headings":"","what":"Data masking and tidy selection NOTEs","title":"Using dplyr in packages","text":"’re writing package function uses data masking tidy selection: ’ll get NOTE R CMD check doesn’t know dplyr functions use tidy evaluation: eliminate note: data masking, import .data rlang use .data$var instead var. tidy selection, use \"var\" instead var. yields: programming dplyr, see vignette(\"programming\", package = \"dplyr\").","code":"my_summary_function <- function(data) { data %>% select(grp, x, y) %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } N checking R code for possible problems my_summary_function: no visible binding for global variable ‘grp’, ‘x’, ‘y’ Undefined global functions or variables: grp x y #' @importFrom rlang .data my_summary_function <- function(data) { data %>% select(\"grp\", \"x\", \"y\") %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"deprecation","dir":"Articles","previous_headings":"","what":"Deprecation","title":"Using dplyr in packages","text":"section focused updating package code deal backwards incompatible changes dplyr. try minimize backward incompatible changes much possible, sometimes necessary order radically simplify existing code, unlock lot potential value future. start general advice supporting multiple versions dplyr , discuss specific changes dplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"multiple-dplyr-versions","dir":"Articles","previous_headings":"Deprecation","what":"Multiple dplyr versions","title":"Using dplyr in packages","text":"Ideally, introduce breaking change ’ll want make sure package works released version development version dplyr. typically little bit work, two big advantages: ’s convenient users, since package work regardless version dplyr installed. ’s easier CRAN since doesn’t require massive coordinated release multiple packages. break package, typically send pull request implements patch releasing next version dplyr. time, patch backwards compatible older versions dplyr well. Ideally, ’ll accept patch submit new version package CRAN new version dplyr released. make code work multiple versions package, first tool simple statement: Always condition > current-version, >= next-version ensure branch also used development version package. example, current release version \"0.5.0\", development version \"0.5.0.9000\". typically works well branch “new version” introduces new argument slightly different return value. doesn’t work ’ve introduced new function need switch , like: case, checks run dplyr 1.0.10 ’ll get warning using function dplyr doesn’t exist (reframe()) even though branch never run. can get around using utils::getFromNamespace() indirectly call new dplyr function: soon next version dplyr actually CRAN (1.1.0 case), feel free remove code unconditionally use reframe() long also require dplyr (>= 1.1.0) DESCRIPTION file. typically painful users, ’d already updating package run requirement, updating one package along way generally easy. also helps get latest bug fixes features dplyr. Sometimes, isn’t possible avoid call @importFrom. example might importing generic can define method , generic moved packages. case, can take advantage little-known feature NAMESPACE file: can include raw statements.","code":"if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { # code for new version } else { # code for old version } if (utils::packageVersion(\"dplyr\") > \"1.0.10\") { dplyr::reframe(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } if (utils::packageVersion(\"dplyr\") > \"1.0.10\") { utils::getFromNamespace(\"reframe\", \"dplyr\")(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } #' @rawNamespace #' if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { #' importFrom(\"dbplyr\", \"build_sql\") #' } else { #' importFrom(\"dplyr\", \"build_sql\") #' }"},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"deprecation-of-mutate_-and-summarise_","dir":"Articles","previous_headings":"Deprecation","what":"Deprecation of mutate_*() and summarise_*()","title":"Using dplyr in packages","text":"following mutate() summarise() variants deprecated dplyr 0.7.0: mutate_each(), summarise_each() following variants superseded dplyr 1.0.0: mutate_all(), summarise_all() mutate_if(), summarise_if() mutate_at(), summarise_at() replaced using mutate() summarise() combination across(), introduced dplyr 1.0.0. used mutate_all() mutate_each() without supplying selection, update use across(everything()): provided selection mutate_at() mutate_each(), can switch across() selection: used predicates mutate_if(), can switch using across() combination ():","code":"starwars %>% mutate_each(funs(as.character)) starwars %>% mutate_all(funs(as.character)) starwars %>% mutate(across(everything(), as.character)) starwars %>% mutate_each(funs(as.character), height, mass) starwars %>% mutate_at(vars(height, mass), as.character) starwars %>% mutate(across(c(height, mass), as.character)) starwars %>% mutate_if(is.factor, as.character) starwars %>% mutate(across(where(is.factor), as.character))"},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"data-frame-subclasses","dir":"Articles","previous_headings":"","what":"Data frame subclasses","title":"Using dplyr in packages","text":"package author extending dplyr work new data frame subclass, encourage read documentation ?dplyr_extending. contains advice implement minimal number extension generics possible get maximal compatibility across dplyr’s verbs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Programming with dplyr","text":"dplyr verbs use tidy evaluation way. Tidy evaluation special type non-standard evaluation used throughout tidyverse. two basic forms found dplyr: arrange(), count(), filter(), group_by(), mutate(), summarise() use data masking can use data variables variables environment (.e. write my_variable df$my_variable). across(), relocate(), rename(), select(), pull() use tidy selection can easily choose variables based position, name, type (e.g. starts_with(\"x\") .numeric). determine whether function argument uses data masking tidy selection, look documentation: arguments list, ’ll see . Data masking tidy selection make interactive data exploration fast fluid, add new challenges attempt use indirectly loop function. vignette shows overcome challenges. ’ll first go basics data masking tidy selection, talk use indirectly, show number recipes solve common problems. vignette give minimum knowledge need effective programmer tidy evaluation. ’d like learn underlying theory, precisely ’s different non-standard evaluation, recommend read Metaprogramming chapters Advanced R.","code":"library(dplyr)"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"data-masking","dir":"Articles","previous_headings":"","what":"Data masking","title":"Programming with dplyr","text":"Data masking makes data manipulation faster requires less typing. (all1) base R functions need refer variables $, leading code repeats name data frame many times: dplyr equivalent code concise data masking allows need type starwars :","code":"starwars[starwars$homeworld == \"Naboo\" & starwars$species == \"Human\", ,] starwars %>% filter(homeworld == \"Naboo\", species == \"Human\")"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"data--and-env-variables","dir":"Articles","previous_headings":"Data masking","what":"Data- and env-variables","title":"Programming with dplyr","text":"key idea behind data masking blurs line two different meanings word “variable”: env-variables “programming” variables live environment. usually created <-. data-variables “statistical” variables live data frame. usually come data files (e.g. .csv, .xls), created manipulating existing variables. make definitions little concrete, take piece code: creates env-variable, df, contains two data-variables, x y. extracts data-variable x env-variable df using $. think blurring meaning “variable” really nice feature interactive data analysis allows refer data-vars , without prefix. seems fairly intuitive since many newer R users attempt write diamonds[x == 0 | y == 0, ]. Unfortunately, benefit come free. start program tools, ’re going grapple distinction. hard ’ve never think , ’ll take brain learn new concepts categories. However, ’ve teased apart idea “variable” data-variable env-variable, think ’ll find fairly straightforward use.","code":"df <- data.frame(x = runif(3), y = runif(3)) df$x #> [1] 0.08075014 0.83433304 0.60076089"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"indirection","dir":"Articles","previous_headings":"Data masking","what":"Indirection","title":"Programming with dplyr","text":"main challenge programming functions use data masking arises introduce indirection, .e. want get data-variable env-variable instead directly typing data-variable’s name. two main cases: data-variable function argument (.e. env-variable holds promise2), need embrace argument surrounding doubled braces, like filter(df, {{ var }}). following function uses embracing create wrapper around summarise() computes minimum maximum values variable, well number observations summarised: env-variable character vector, need index .data pronoun [[, like summarise(df, mean = mean(.data[[var]])). following example uses .data count number unique values variable mtcars: Note .data data frame; ’s special construct, pronoun, allows access current variables either directly, .data$x indirectly .data[[var]]. Don’t expect functions work .","code":"var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"name-injection","dir":"Articles","previous_headings":"Data masking","what":"Name injection","title":"Programming with dplyr","text":"Many data masking functions also use dynamic dots, gives another useful feature: generating names programmatically using := instead =. two basics forms, illustrated tibble(): name env-variable, can use glue syntax interpolate : name derived data-variable argument, can use embracing syntax: Learn ?rlang::`dyn-dots`.","code":"name <- \"susan\" tibble(\"{name}\" := 2) #> # A tibble: 1 × 1 #> susan #> #> 1 2 my_df <- function(x) { tibble(\"{{x}}_2\" := x * 2) } my_var <- 10 my_df(my_var) #> # A tibble: 1 × 1 #> my_var_2 #> #> 1 20"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"tidy-selection","dir":"Articles","previous_headings":"","what":"Tidy selection","title":"Programming with dplyr","text":"Data masking makes easy compute values within dataset. Tidy selection complementary tool makes easy work columns dataset.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"the-tidyselect-dsl","dir":"Articles","previous_headings":"Tidy selection","what":"The tidyselect DSL","title":"Programming with dplyr","text":"Underneath functions use tidy selection tidyselect package. provides miniature domain specific language makes easy select columns name, position, type. example: select(df, 1) selects first column; select(df, last_col()) selects last column. select(df, c(, b, c)) selects columns , b, c. select(df, starts_with(\"\")) selects columns whose name starts “”; select(df, ends_with(\"z\")) selects columns whose name ends “z”. select(df, (.numeric)) selects numeric columns. can see details ?dplyr_tidy_select.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"indirection-1","dir":"Articles","previous_headings":"Tidy selection","what":"Indirection","title":"Programming with dplyr","text":"data masking, tidy selection makes common task easier cost making less common task harder. want use tidy select indirectly column specification stored intermediate variable, ’ll need learn new tools. , two forms indirection: data-variable env-variable function argument, use technique data masking: embrace argument surrounding doubled braces. following function summarises data frame computing mean variables selected user: env-variable character vector, need use all_of() any_of() depending whether want function error variable found. following code uses all_of() select variables found character vector; ! plus all_of() select variables found character vector:","code":"summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) vars <- c(\"mpg\", \"vs\") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars))"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"how-tos","dir":"Articles","previous_headings":"","what":"How-tos","title":"Programming with dplyr","text":"following examples solve grab bag common problems. show minimum amount code can get basic idea; real problems require code combining multiple techniques.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"user-supplied-data","dir":"Articles","previous_headings":"How-tos","what":"User-supplied data","title":"Programming with dplyr","text":"check documentation, ’ll see .data never uses data masking tidy select. means don’t need anything special function:","code":"mutate_y <- function(data) { mutate(data, y = a + x) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"one-or-more-user-supplied-expressions","dir":"Articles","previous_headings":"How-tos","what":"One or more user-supplied expressions","title":"Programming with dplyr","text":"want user supply expression ’s passed onto argument uses data masking tidy select, embrace argument: generalises straightforward way want use one user-supplied expression multiple places: want user provide multiple expressions, embrace : want use name variable output, can embrace variable name left-hand side := {{:","code":"my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } my_summarise4 <- function(data, expr) { data %>% summarise( \"mean_{{expr}}\" := mean({{ expr }}), \"sum_{{expr}}\" := sum({{ expr }}), \"n_{{expr}}\" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( \"mean_{{mean_var}}\" := mean({{ mean_var }}), \"sd_{{sd_var}}\" := sd({{ sd_var }}) ) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"any-number-of-user-supplied-expressions","dir":"Articles","previous_headings":"How-tos","what":"Any number of user-supplied expressions","title":"Programming with dplyr","text":"want take arbitrary number user supplied expressions, use .... often useful want give user full control single part pipeline, like group_by() mutate(). use ... way, make sure arguments start . reduce chances argument clashes; see https://design.tidyverse.org/dots-prefix.html details.","code":"my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) #> # A tibble: 49 × 3 #> homeworld mass height #> #> 1 Alderaan 64 176. #> 2 Aleen Minor 15 79 #> 3 Bespin 79 175 #> 4 Bestine IV 110 180 #> # ℹ 45 more rows starwars %>% my_summarise(sex, gender) #> `summarise()` has grouped output by 'sex'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 4 #> # Groups: sex [5] #> sex gender mass height #> #> 1 female feminine 54.7 172. #> 2 hermaphroditic masculine 1358 175 #> 3 male masculine 80.2 179. #> 4 none feminine NaN 96 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"creating-multiple-columns","dir":"Articles","previous_headings":"How-tos","what":"Creating multiple columns","title":"Programming with dplyr","text":"Sometimes can useful single expression return multiple columns. can returning unnamed data frame: sort function useful inside summarise() mutate() allow add multiple columns returning data frame: Notice set .unpack = TRUE inside across(). tells across() unpack data frame returned quantile_df() respective columns, combining column names original columns (x y) column names returned function (val quant). function returns multiple rows per group, ’ll need switch summarise() reframe(). summarise() restricted returning 1 row summaries per group, reframe() lifts restriction:","code":"quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs), quant = probs ) } x <- 1:5 quantile_df(x) #> # A tibble: 3 × 2 #> val quant #> #> 1 2 0.25 #> 2 3 0.5 #> 3 4 0.75 df <- tibble( grp = rep(1:3, each = 10), x = runif(30), y = rnorm(30) ) df %>% group_by(grp) %>% summarise(quantile_df(x, probs = .5)) #> # A tibble: 3 × 3 #> grp val quant #> #> 1 1 0.361 0.5 #> 2 2 0.541 0.5 #> 3 3 0.456 0.5 df %>% group_by(grp) %>% summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE)) #> # A tibble: 3 × 5 #> grp x_val x_quant y_val y_quant #> #> 1 1 0.361 0.5 0.174 0.5 #> 2 2 0.541 0.5 -0.0110 0.5 #> 3 3 0.456 0.5 0.0583 0.5 df %>% group_by(grp) %>% reframe(across(x:y, quantile_df, .unpack = TRUE)) #> # A tibble: 9 × 5 #> grp x_val x_quant y_val y_quant #> #> 1 1 0.219 0.25 -0.710 0.25 #> 2 1 0.361 0.5 0.174 0.5 #> 3 1 0.674 0.75 0.524 0.75 #> 4 2 0.315 0.25 -0.690 0.25 #> # ℹ 5 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"transforming-user-supplied-variables","dir":"Articles","previous_headings":"How-tos","what":"Transforming user-supplied variables","title":"Programming with dplyr","text":"want user provide set data-variables transformed, use across() pick(): can use idea multiple sets input data-variables: Use .names argument across() control names output.","code":"my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) #> # A tibble: 38 × 3 #> species mass height #> #> 1 Aleena 15 79 #> 2 Besalisk 102 198 #> 3 Cerean 82 198 #> 4 Chagrian NaN 196 #> # ℹ 34 more rows my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = \"mean_{.col}\")) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"loop-over-multiple-variables","dir":"Articles","previous_headings":"How-tos","what":"Loop over multiple variables","title":"Programming with dplyr","text":"character vector variable names, want operate loop, index special .data pronoun: technique works loop alternatives like base R apply() family purrr map() family: (Note x .data[[x]] always treated env-variable; never come data.)","code":"for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]]))"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"use-a-variable-from-an-shiny-input","dir":"Articles","previous_headings":"How-tos","what":"Use a variable from an Shiny input","title":"Programming with dplyr","text":"Many Shiny input controls return character vectors, can use approach : .data[[input$var]]. See https://mastering-shiny.org/action-tidy.html details case studies.","code":"library(shiny) ui <- fluidPage( selectInput(\"var\", \"Variable\", choices = names(diamonds)), tableOutput(\"output\") ) server <- function(input, output, session) { data <- reactive(filter(diamonds, .data[[input$var]] > 0)) output$output <- renderTable(head(data())) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"creating","dir":"Articles","previous_headings":"","what":"Creating","title":"Row-wise operations","text":"Row-wise operations require special type grouping group consists single row. create rowwise(): Like group_by(), rowwise() doesn’t really anything ; just changes verbs work. example, compare results mutate() following code: use mutate() regular data frame, computes mean x, y, z across rows. apply row-wise data frame, computes mean row. can optionally supply “identifier” variables call rowwise(). variables preserved call summarise(), behave somewhat similarly grouping variables passed group_by(): rowwise() just special form grouping, want remove data frame, just call ungroup().","code":"df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() #> # A tibble: 2 × 3 #> # Rowwise: #> x y z #> #> 1 1 3 5 #> 2 2 4 6 df %>% mutate(m = mean(c(x, y, z))) #> # A tibble: 2 × 4 #> x y z m #> #> 1 1 3 5 3.5 #> 2 2 4 6 3.5 df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) #> # A tibble: 2 × 4 #> # Rowwise: #> x y z m #> #> 1 1 3 5 3 #> 2 2 4 6 4 df <- tibble(name = c(\"Mara\", \"Hadley\"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) #> # A tibble: 2 × 1 #> m #> #> 1 3 #> 2 4 df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) #> `summarise()` has grouped output by 'name'. You can override using the #> `.groups` argument. #> # A tibble: 2 × 2 #> # Groups: name [2] #> name m #> #> 1 Mara 3 #> 2 Hadley 4"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"per-row-summary-statistics","dir":"Articles","previous_headings":"","what":"Per row summary statistics","title":"Row-wise operations","text":"dplyr::summarise() makes really easy summarise values across rows within one column. combined rowwise() also makes easy summarise values across columns within one row. see , ’ll start making little dataset: Let’s say want compute sum w, x, y, z row. start making row-wise data frame: can use mutate() add new column row, summarise() return just one summary: course, lot variables, ’s going tedious type every variable name. Instead, can use c_across() uses tidy selection syntax can succinctly select many variables: combine column-wise operations (see vignette(\"colwise\") details) compute proportion total column:","code":"df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df #> # A tibble: 6 × 5 #> id w x y z #> #> 1 1 10 20 30 40 #> 2 2 11 21 31 41 #> 3 3 12 22 32 42 #> 4 4 13 23 33 43 #> # ℹ 2 more rows rf <- df %>% rowwise(id) rf %>% mutate(total = sum(c(w, x, y, z))) #> # A tibble: 6 × 6 #> # Rowwise: id #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows rf %>% summarise(total = sum(c(w, x, y, z))) #> `summarise()` has grouped output by 'id'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 2 #> # Groups: id [6] #> id total #> #> 1 1 100 #> 2 2 104 #> 3 3 108 #> 4 4 112 #> # ℹ 2 more rows rf %>% mutate(total = sum(c_across(w:z))) #> # A tibble: 6 × 6 #> # Rowwise: id #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows rf %>% mutate(total = sum(c_across(where(is.numeric)))) #> # A tibble: 6 × 6 #> # Rowwise: id #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) #> # A tibble: 6 × 6 #> id w x y z total #> #> 1 1 0.1 0.2 0.3 0.4 100 #> 2 2 0.106 0.202 0.298 0.394 104 #> 3 3 0.111 0.204 0.296 0.389 108 #> 4 4 0.116 0.205 0.295 0.384 112 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"row-wise-summary-functions","dir":"Articles","previous_headings":"Per row summary statistics","what":"Row-wise summary functions","title":"Row-wise operations","text":"rowwise() approach work summary function. need greater speed, ’s worth looking built-row-wise variant summary function. efficient operate data frame whole; don’t split rows, compute summary, join results back together . NB: use df (rf) pick() (c_across()) rowMeans() rowSums() take multi-row data frame input. Also note -id needed avoid selecting id pick(). wasn’t required rowwise data frame specified id identifier original call rowwise(), preventing selected grouping column.","code":"df %>% mutate(total = rowSums(pick(where(is.numeric), -id))) #> # A tibble: 6 × 6 #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id))) #> # A tibble: 6 × 6 #> id w x y z mean #> #> 1 1 10 20 30 40 25 #> 2 2 11 21 31 41 26 #> 3 3 12 22 32 42 27 #> 4 4 13 23 33 43 28 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"list-columns","dir":"Articles","previous_headings":"","what":"List-columns","title":"Row-wise operations","text":"rowwise() operations natural pairing list-columns. allow avoid explicit loops /functions apply() purrr::map() families.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"motivation","dir":"Articles","previous_headings":"List-columns","what":"Motivation","title":"Row-wise operations","text":"Imagine data frame, want count lengths element: might try calling length(): returns length column, length individual values. ’re R documentation aficionado, might know ’s already base R function just purpose: ’re experienced R programmer, might know apply function element list using sapply(), vapply(), one purrr map() functions: wouldn’t nice just write length(x) dplyr figure wanted compute length element inside x? Since ’re , might already guessing answer: just another application row-wise pattern.","code":"df <- tibble( x = list(1, 2:3, 4:6) ) df %>% mutate(l = length(x)) #> # A tibble: 3 × 2 #> x l #> #> 1 3 #> 2 3 #> 3 3 df %>% mutate(l = lengths(x)) #> # A tibble: 3 × 2 #> x l #> #> 1 1 #> 2 2 #> 3 3 df %>% mutate(l = sapply(x, length)) #> # A tibble: 3 × 2 #> x l #> #> 1 1 #> 2 2 #> 3 3 df %>% mutate(l = purrr::map_int(x, length)) #> # A tibble: 3 × 2 #> x l #> #> 1 1 #> 2 2 #> 3 3 df %>% rowwise() %>% mutate(l = length(x)) #> # A tibble: 3 × 2 #> # Rowwise: #> x l #> #> 1 1 #> 2 2 #> 3 3"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"subsetting","dir":"Articles","previous_headings":"List-columns","what":"Subsetting","title":"Row-wise operations","text":"continue , wanted briefly mention magic makes work. isn’t something ’ll generally need think (’ll just work), ’s useful know something goes wrong. ’s important difference grouped data frame group happens one row, row-wise data frame every group always one row. Take two data frames: compute properties y, ’ll notice results look different: key difference mutate() slices columns pass length(y) grouped mutate uses [ row-wise mutate uses [[. following code gives flavour differences used loop: Note magic applies ’re referring existing columns, ’re creating new rows. potentially confusing, ’re fairly confident ’s least worst solution, particularly given hint error message.","code":"df <- tibble(g = 1:2, y = list(1:3, \"a\")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) gf %>% mutate(type = typeof(y), length = length(y)) #> # A tibble: 2 × 4 #> # Groups: g [2] #> g y type length #> #> 1 1 list 1 #> 2 2 list 1 rf %>% mutate(type = typeof(y), length = length(y)) #> # A tibble: 2 × 4 #> # Rowwise: g #> g y type length #> #> 1 1 integer 3 #> 2 2 character 1 # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 #> [1] 1 1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 #> [1] 3 1 gf %>% mutate(y2 = y) #> # A tibble: 2 × 3 #> # Groups: g [2] #> g y y2 #> #> 1 1 #> 2 2 rf %>% mutate(y2 = y) #> Error in `mutate()`: #> ℹ In argument: `y2 = y`. #> ℹ In row 1. #> Caused by error: #> ! `y2` must be size 1, not 3. #> ℹ Did you mean: `y2 = list(y)` ? rf %>% mutate(y2 = list(y)) #> # A tibble: 2 × 3 #> # Rowwise: g #> g y y2 #> #> 1 1 #> 2 2 "},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"modelling","dir":"Articles","previous_headings":"List-columns","what":"Modelling","title":"Row-wise operations","text":"rowwise() data frames allow solve variety modelling problems think particularly elegant way. ’ll start creating nested data frame: little different usual group_by() output: visibly changed structure data. Now three rows (one group), list-col, data, stores data group. Also note output rowwise(); important ’s going make working list data frames much easier. one data frame per row, ’s straightforward make one model per row: supplement one set predictions per row: summarise model variety ways: easily access parameters model:","code":"by_cyl <- mtcars %>% nest_by(cyl) by_cyl #> # A tibble: 3 × 2 #> # Rowwise: cyl #> cyl data #> #> 1 4 #> 2 6 #> 3 8 mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods #> # A tibble: 3 × 3 #> # Rowwise: cyl #> cyl data mod #> #> 1 4 #> 2 6 #> 3 8 mods <- mods %>% mutate(pred = list(predict(mod, data))) mods #> # A tibble: 3 × 4 #> # Rowwise: cyl #> cyl data mod pred #> #> 1 4 #> 2 6 #> 3 8 mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rmse #> #> 1 4 3.01 #> 2 6 0.985 #> 3 8 1.87 mods %>% summarise(rsq = summary(mod)$r.squared) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rsq #> #> 1 4 0.509 #> 2 6 0.465 #> 3 8 0.423 mods %>% summarise(broom::glance(mod)) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 13 #> # Groups: cyl [3] #> cyl r.squared adj.r.squared sigma statistic p.value df logLik AIC #> #> 1 4 0.509 0.454 3.33 9.32 0.0137 1 -27.7 61.5 #> 2 6 0.465 0.357 1.17 4.34 0.0918 1 -9.83 25.7 #> 3 8 0.423 0.375 2.02 8.80 0.0118 1 -28.7 63.3 #> # ℹ 4 more variables: BIC , deviance , df.residual , #> # nobs mods %>% reframe(broom::tidy(mod)) #> # A tibble: 6 × 6 #> cyl term estimate std.error statistic p.value #> #> 1 4 (Intercept) 39.6 4.35 9.10 0.00000777 #> 2 4 wt -5.65 1.85 -3.05 0.0137 #> 3 6 (Intercept) 28.4 4.18 6.79 0.00105 #> 4 6 wt -2.78 1.33 -2.08 0.0918 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"repeated-function-calls","dir":"Articles","previous_headings":"","what":"Repeated function calls","title":"Row-wise operations","text":"rowwise() doesn’t just work functions return length-1 vector (aka summary functions); can work function result list. means rowwise() mutate() provide elegant way call function many times varying arguments, storing outputs alongside inputs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"simulations","dir":"Articles","previous_headings":"Repeated function calls","what":"Simulations","title":"Row-wise operations","text":"think particularly elegant way perform simulations, lets store simulated values along parameters generated . example, imagine following data frame describes properties 3 samples uniform distribution: can supply parameters runif() using rowwise() mutate(): Note use list() - runif() returns multiple values mutate() expression return something length 1. list() means ’ll get list column row list containing multiple values. forget use list(), dplyr give hint:","code":"df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) #> # A tibble: 3 × 4 #> # Rowwise: #> n min max data #> #> 1 1 0 1 #> 2 2 10 100 #> 3 3 100 1000 df %>% rowwise() %>% mutate(data = runif(n, min, max)) #> Error in `mutate()`: #> ℹ In argument: `data = runif(n, min, max)`. #> ℹ In row 2. #> Caused by error: #> ! `data` must be size 1, not 2. #> ℹ Did you mean: `data = list(runif(n, min, max))` ?"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"multiple-combinations","dir":"Articles","previous_headings":"Repeated function calls","what":"Multiple combinations","title":"Row-wise operations","text":"want call function every combination inputs? can use expand.grid() (tidyr::expand_grid()) generate data frame repeat pattern :","code":"df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) #> # A tibble: 9 × 3 #> # Rowwise: #> mean sd data #> #> 1 -1 1 #> 2 0 1 #> 3 1 1 #> 4 -1 10 #> # ℹ 5 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"varying-functions","dir":"Articles","previous_headings":"Repeated function calls","what":"Varying functions","title":"Row-wise operations","text":"complicated problems, might also want vary function called. tends bit awkward fit approach columns input tibble less regular. ’s still possible, ’s natural place use .call():","code":"df <- tribble( ~rng, ~params, \"runif\", list(n = 10), \"rnorm\", list(n = 20), \"rpois\", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) #> # A tibble: 3 × 3 #> # Rowwise: #> rng params data #> #> 1 runif #> 2 rnorm #> 3 rpois "},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"rowwise","dir":"Articles","previous_headings":"Previously","what":"rowwise()","title":"Row-wise operations","text":"rowwise() also questioning quite time, partly didn’t appreciate many people needed native ability compute summaries across multiple variables row. alternative, recommended performing row-wise operations purrr map() functions. However, challenging needed pick map function based number arguments varying type result, required quite knowledge purrr functions. also resistant rowwise() felt like automatically switching [ [[ magical way automatically list()-ing results made () magical. ’ve now persuaded row-wise magic good magic partly people find distinction [ [[ mystifying rowwise() means don’t need think . Since rowwise() clearly useful longer questioning, expect around long term.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"do","dir":"Articles","previous_headings":"Previously","what":"do()","title":"Row-wise operations","text":"’ve questioned need () quite time, never felt similar dplyr verbs. two main modes operation: Without argument names: call functions input output data frames using . refer “current” group. example, following code gets first row group: superseded pick() plus reframe(), variant summarise() can create multiple rows columns per group. arguments: worked like mutate() automatically wrapped every element list: now believe behaviour magical useful, can replaced summarise() pick(). needed (unlike ), can wrap results list . addition pick()/across() increased scope summarise()/reframe() means () longer needed, now superseded.","code":"mtcars %>% group_by(cyl) %>% do(head(., 1)) #> # A tibble: 3 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 3 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 mtcars %>% group_by(cyl) %>% reframe(head(pick(everything()), 1)) #> # A tibble: 3 × 11 #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 3 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) #> # A tibble: 3 × 2 #> # Rowwise: #> cyl nrows #> #> 1 4 #> 2 6 #> 3 8 mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(pick(everything()))) #> # A tibble: 3 × 2 #> cyl nrows #> #> 1 4 11 #> 2 6 7 #> 3 8 14"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"mutating-joins","dir":"Articles","previous_headings":"","what":"Mutating joins","title":"Two-table verbs","text":"Mutating joins allow combine variables multiple tables. example, consider flights airlines data nycflights13 package. one table flight information abbreviation carrier, another mapping abbreviations full names. can use join add carrier names flight data:","code":"library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) #> Joining with `by = join_by(carrier)` #> # A tibble: 336,776 × 9 #> year month day hour origin dest tailnum carrier name #> #> 1 2013 1 1 5 EWR IAH N14228 UA United Air Lines I… #> 2 2013 1 1 5 LGA IAH N24211 UA United Air Lines I… #> 3 2013 1 1 5 JFK MIA N619AA AA American Airlines … #> 4 2013 1 1 5 JFK BQN N804JB B6 JetBlue Airways #> 5 2013 1 1 6 LGA ATL N668DN DL Delta Air Lines In… #> # ℹ 336,771 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"controlling-how-the-tables-are-matched","dir":"Articles","previous_headings":"Mutating joins","what":"Controlling how the tables are matched","title":"Two-table verbs","text":"well x y, mutating join takes argument controls variables used match observations two tables. ways specify , illustrate various tables nycflights13: NULL, default. dplyr use variables appear tables, natural join. example, flights weather tables match common variables: year, month, day, hour origin. character vector, = \"x\". Like natural join, uses common variables. example, flights planes year columns, mean different things want join tailnum. Note year columns output disambiguated suffix. named character vector: = c(\"x\" = \"\"). match variable x table x variable table y. variables use used output. flight origin destination airport, need specify one want join :","code":"flights2 %>% left_join(weather) #> Joining with `by = join_by(year, month, day, hour, origin)` #> # A tibble: 336,776 × 18 #> year month day hour origin dest tailnum carrier temp dewp humid #> #> 1 2013 1 1 5 EWR IAH N14228 UA 39.0 28.0 64.4 #> 2 2013 1 1 5 LGA IAH N24211 UA 39.9 25.0 54.8 #> 3 2013 1 1 5 JFK MIA N619AA AA 39.0 27.0 61.6 #> 4 2013 1 1 5 JFK BQN N804JB B6 39.0 27.0 61.6 #> 5 2013 1 1 6 LGA ATL N668DN DL 39.9 25.0 54.8 #> # ℹ 336,771 more rows #> # ℹ 7 more variables: wind_dir , wind_speed , wind_gust , #> # precip , pressure , visib , time_hour flights2 %>% left_join(planes, by = \"tailnum\") #> # A tibble: 336,776 × 16 #> year.x month day hour origin dest tailnum carrier year.y type #> #> 1 2013 1 1 5 EWR IAH N14228 UA 1999 Fixed wing… #> 2 2013 1 1 5 LGA IAH N24211 UA 1998 Fixed wing… #> 3 2013 1 1 5 JFK MIA N619AA AA 1990 Fixed wing… #> 4 2013 1 1 5 JFK BQN N804JB B6 2012 Fixed wing… #> 5 2013 1 1 6 LGA ATL N668DN DL 1991 Fixed wing… #> # ℹ 336,771 more rows #> # ℹ 6 more variables: manufacturer , model , engines , #> # seats , speed , engine flights2 %>% left_join(airports, c(\"dest\" = \"faa\")) #> # A tibble: 336,776 × 15 #> year month day hour origin dest tailnum carrier name lat lon #> #> 1 2013 1 1 5 EWR IAH N14228 UA George… 30.0 -95.3 #> 2 2013 1 1 5 LGA IAH N24211 UA George… 30.0 -95.3 #> 3 2013 1 1 5 JFK MIA N619AA AA Miami … 25.8 -80.3 #> 4 2013 1 1 5 JFK BQN N804JB B6 NA NA NA #> 5 2013 1 1 6 LGA ATL N668DN DL Hartsf… 33.6 -84.4 #> # ℹ 336,771 more rows #> # ℹ 4 more variables: alt , tz , dst , tzone flights2 %>% left_join(airports, c(\"origin\" = \"faa\")) #> # A tibble: 336,776 × 15 #> year month day hour origin dest tailnum carrier name lat lon #> #> 1 2013 1 1 5 EWR IAH N14228 UA Newark… 40.7 -74.2 #> 2 2013 1 1 5 LGA IAH N24211 UA La Gua… 40.8 -73.9 #> 3 2013 1 1 5 JFK MIA N619AA AA John F… 40.6 -73.8 #> 4 2013 1 1 5 JFK BQN N804JB B6 John F… 40.6 -73.8 #> 5 2013 1 1 6 LGA ATL N668DN DL La Gua… 40.8 -73.9 #> # ℹ 336,771 more rows #> # ℹ 4 more variables: alt , tz , dst , tzone "},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"types-of-join","dir":"Articles","previous_headings":"Mutating joins","what":"Types of join","title":"Two-table verbs","text":"four types mutating join, differ behaviour match found. ’ll illustrate simple example: inner_join(x, y) includes observations match x y. left_join(x, y) includes observations x, regardless whether match . commonly used join ensures don’t lose observations primary table. right_join(x, y) includes observations y. ’s equivalent left_join(y, x), columns rows ordered differently. full_join() includes observations x y. left, right full joins collectively know outer joins. row doesn’t match outer join, new variables filled missing values.","code":"df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = \"a\") df1 %>% inner_join(df2) %>% knitr::kable() #> Joining with `by = join_by(x)` df1 %>% left_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 2 × 4 #> x y a b #> #> 1 1 2 10 a #> 2 2 1 NA NA df1 %>% right_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 2 × 4 #> x y a b #> #> 1 1 2 10 a #> 2 3 NA 10 a df2 %>% left_join(df1) #> Joining with `by = join_by(x)` #> # A tibble: 2 × 4 #> x a b y #> #> 1 3 10 a NA #> 2 1 10 a 2 df1 %>% full_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 3 × 4 #> x y a b #> #> 1 1 2 10 a #> 2 2 1 NA NA #> 3 3 NA 10 a"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"observations","dir":"Articles","previous_headings":"Mutating joins","what":"Observations","title":"Two-table verbs","text":"mutating joins primarily used add new variables, can also generate new observations. match unique, join add possible combinations (Cartesian product) matching observations:","code":"df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c(\"a\", \"b\", \"a\")) df1 %>% left_join(df2) #> Joining with `by = join_by(x)` #> Warning in left_join(., df2): Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #> \"many-to-many\"` to silence this warning. #> # A tibble: 5 × 3 #> x y z #> #> 1 1 1 a #> 2 1 1 b #> 3 1 2 a #> 4 1 2 b #> 5 2 3 a"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"filtering-joins","dir":"Articles","previous_headings":"","what":"Filtering joins","title":"Two-table verbs","text":"Filtering joins match observations way mutating joins, affect observations, variables. two types: semi_join(x, y) keeps observations x match y. anti_join(x, y) drops observations x match y. useful diagnosing join mismatches. example, many flights nycflights13 dataset don’t matching tail number planes table: ’re worried observations joins match, start semi_join() anti_join(). semi_join() anti_join() never duplicate; ever remove observations.","code":"library(\"nycflights13\") flights %>% anti_join(planes, by = \"tailnum\") %>% count(tailnum, sort = TRUE) #> # A tibble: 722 × 2 #> tailnum n #> #> 1 NA 2512 #> 2 N725MQ 575 #> 3 N722MQ 513 #> 4 N723MQ 507 #> 5 N713MQ 483 #> # ℹ 717 more rows df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c(\"a\", \"b\", \"a\")) # Four rows to start with: df1 %>% nrow() #> [1] 4 # And we get four rows after the join df1 %>% inner_join(df2, by = \"x\") %>% nrow() #> Warning in inner_join(., df2, by = \"x\"): Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #> \"many-to-many\"` to silence this warning. #> [1] 4 # But only two rows actually match df1 %>% semi_join(df2, by = \"x\") %>% nrow() #> [1] 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"set-operations","dir":"Articles","previous_headings":"","what":"Set operations","title":"Two-table verbs","text":"final type two-table verb set operations. expect x y inputs variables, treat observations like sets: intersect(x, y): return observations x y union(x, y): return unique observations x y setdiff(x, y): return observations x, y. Given simple data: four possibilities :","code":"(df1 <- tibble(x = 1:2, y = c(1L, 1L))) #> # A tibble: 2 × 2 #> x y #> #> 1 1 1 #> 2 2 1 (df2 <- tibble(x = 1:2, y = 1:2)) #> # A tibble: 2 × 2 #> x y #> #> 1 1 1 #> 2 2 2 intersect(df1, df2) #> # A tibble: 1 × 2 #> x y #> #> 1 1 1 # Note that we get 3 rows, not 4 union(df1, df2) #> # A tibble: 3 × 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 2 2 setdiff(df1, df2) #> # A tibble: 1 × 2 #> x y #> #> 1 2 1 setdiff(df2, df1) #> # A tibble: 1 × 2 #> x y #> #> 1 2 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"multiple-table-verbs","dir":"Articles","previous_headings":"","what":"Multiple-table verbs","title":"Two-table verbs","text":"dplyr provide functions working three tables. Instead use purrr::reduce() Reduce(), described Advanced R, iteratively combine two-table verbs handle many tables need.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"types-of-window-functions","dir":"Articles","previous_headings":"","what":"Types of window functions","title":"Window functions","text":"five main families window functions. Two families unrelated aggregation functions: Ranking ordering functions: row_number(), min_rank(), dense_rank(), cume_dist(), percent_rank(), ntile(). functions take vector order , return various types ranks. Offsets lead() lag() allow access previous next values vector, making easy compute differences trends. three families variations familiar aggregate functions: Cumulative aggregates: cumsum(), cummin(), cummax() (base R), cumall(), cumany(), cummean() (dplyr). Rolling aggregates operate fixed width window. won’t find base R dplyr, many implementations packages, RcppRoll. Recycled aggregates, aggregate repeated match length input. needed R vector recycling automatically recycles aggregates needed. important SQL, presence aggregation function usually tells database return one row per group. family described detail , focussing general goals use dplyr. details, refer individual function documentation.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"ranking-functions","dir":"Articles","previous_headings":"","what":"Ranking functions","title":"Window functions","text":"ranking functions variations theme, differing handle ties: ’re familiar R, may recognise row_number() min_rank() can computed base rank() function various values ties.method argument. functions provided save little typing, make easier convert R SQL. Two ranking functions return numbers 0 1. percent_rank() gives percentage rank; cume_dist() gives proportion values less equal current value. useful want select (example) top 10% records within group. example: Finally, ntile() divides data n evenly sized buckets. ’s coarse ranking, can used mutate() divide data buckets summary. example, use ntile() divide players within team four ranked groups, calculate average number games within group. ranking functions rank lowest highest small input values get small ranks. Use desc() rank highest lowest.","code":"x <- c(1, 1, 2, 2, 2) row_number(x) #> [1] 1 2 3 4 5 min_rank(x) #> [1] 1 1 3 3 3 dense_rank(x) #> [1] 1 1 2 2 2 cume_dist(x) #> [1] 0.4 0.4 1.0 1.0 1.0 percent_rank(x) #> [1] 0.0 0.0 0.5 0.5 0.5 filter(players, cume_dist(desc(G)) < 0.1) #> # A tibble: 1,090 × 7 #> # Groups: playerID [995] #> playerID yearID teamID G AB R H #> #> 1 aaronha01 1963 ML1 161 631 121 201 #> 2 aaronha01 1968 ATL 160 606 84 174 #> 3 abbotji01 1991 CAL 34 0 0 0 #> 4 abernte02 1965 CHN 84 18 1 3 #> # ℹ 1,086 more rows by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) #> `summarise()` has grouped output by 'teamID'. You can override using the #> `.groups` argument. by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) #> # A tibble: 4 × 2 #> quartile `mean(G)` #> #> 1 1 22.7 #> 2 2 91.8 #> 3 3 253. #> 4 4 961."},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"lead-and-lag","dir":"Articles","previous_headings":"","what":"Lead and lag","title":"Window functions","text":"lead() lag() produce offset versions input vector either ahead behind original vector. can use : Compute differences percent changes. Using lag() convenient diff() n inputs diff() returns n - 1 outputs. Find value changes. lead() lag() optional argument order_by. set, instead using row order determine value comes another, use another variable. important already sorted data, want sort one way lag another. ’s simple example happens don’t specify order_by need :","code":"x <- 1:5 lead(x) #> [1] 2 3 4 5 NA lag(x) #> [1] NA 1 2 3 4 # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) # Find when a player changed teams filter(players, teamID != lag(teamID)) df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) #> year value prev_value #> 1 2000 0 4 #> 2 2001 1 0 #> 3 2002 4 9 #> 4 2003 9 16 #> 5 2004 16 NA #> 6 2005 25 1 right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) #> year value prev_value #> 1 2000 0 NA #> 2 2001 1 0 #> 3 2002 4 1 #> 4 2003 9 4 #> 5 2004 16 9 #> 6 2005 25 16"},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"cumulative-aggregates","dir":"Articles","previous_headings":"","what":"Cumulative aggregates","title":"Window functions","text":"Base R provides cumulative sum (cumsum()), cumulative min (cummin()), cumulative max (cummax()). (also provides cumprod() rarely useful). common accumulating functions cumany() cumall(), cumulative versions || &&, cummean(), cumulative mean. included base R, efficient versions provided dplyr. cumany() cumall() useful selecting rows , rows , condition true first (last) time. example, can use cumany() find records player played year 150 games: Like lead lag, may want control order accumulation occurs. None built functions order_by argument dplyr provides helper: order_by(). give variable want order , call window function: function uses bit non-standard evaluation, wouldn’t recommend using inside another function; use simpler less concise with_order() instead.","code":"filter(players, cumany(G > 150)) x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) #> [1] 55 54 52 49 45 40 34 27 19 10"},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"recycled-aggregates","dir":"Articles","previous_headings":"","what":"Recycled aggregates","title":"Window functions","text":"R’s vector recycling makes easy select values higher lower summary. call recycled aggregate value aggregate recycled length original vector. Recycled aggregates useful want find records greater mean less median: SQL databases don’t equivalent median() quantile(), filtering can achieve effect ntile(). example, x > median(x) equivalent ntile(x, 2) == 2; x > quantile(x, 75) equivalent ntile(x, 100) > 75 ntile(x, 4) > 3. can also use idea select records highest (x == max(x)) lowest value (x == min(x)) field, ranking functions give control ties, allow select number records. Recycled aggregates also useful conjunction mutate(). example, batting data, compute “career year”, number years player played since entered league: , introductory example, compute z-score:","code":"filter(players, G > mean(G)) filter(players, G < median(G)) filter(players, ntile(G, 2) == 2) mutate(players, career_year = yearID - min(yearID) + 1) #> # A tibble: 20,874 × 8 #> # Groups: playerID [1,436] #> playerID yearID teamID G AB R H career_year #> #> 1 aaronha01 1954 ML1 122 468 58 131 1 #> 2 aaronha01 1955 ML1 153 602 105 189 2 #> 3 aaronha01 1956 ML1 153 609 106 200 3 #> 4 aaronha01 1957 ML1 151 615 118 198 4 #> # ℹ 20,870 more rows mutate(players, G_z = (G - mean(G)) / sd(G)) #> # A tibble: 20,874 × 8 #> # Groups: playerID [1,436] #> playerID yearID teamID G AB R H G_z #> #> 1 aaronha01 1954 ML1 122 468 58 131 -1.16 #> 2 aaronha01 1955 ML1 153 602 105 189 0.519 #> 3 aaronha01 1956 ML1 153 609 106 200 0.519 #> 4 aaronha01 1957 ML1 151 615 118 198 0.411 #> # ℹ 20,870 more rows"},{"path":"https://dplyr.tidyverse.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Hadley Wickham. Author, maintainer. Romain François. Author. Lionel Henry. Author. Kirill Müller. Author. Davis Vaughan. Author. . Copyright holder, funder.","code":""},{"path":"https://dplyr.tidyverse.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Wickham H, François R, Henry L, Müller K, Vaughan D (2024). dplyr: Grammar Data Manipulation. R package version 1.1.4.9000, https://github.com/tidyverse/dplyr, https://dplyr.tidyverse.org.","code":"@Manual{, title = {dplyr: A Grammar of Data Manipulation}, author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan}, year = {2024}, note = {R package version 1.1.4.9000, https://github.com/tidyverse/dplyr}, url = {https://dplyr.tidyverse.org}, }"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"overview","dir":"","previous_headings":"","what":"Overview","title":"A Grammar of Data Manipulation","text":"dplyr grammar data manipulation, providing consistent set verbs help solve common data manipulation challenges: mutate() adds new variables functions existing variables select() picks variables based names. filter() picks cases based values. summarise() reduces multiple values single summary. arrange() changes ordering rows. combine naturally group_by() allows perform operation “group”. can learn vignette(\"dplyr\"). well single-table verbs, dplyr also provides variety two-table verbs, can learn vignette(\"two-table\"). new dplyr, best place start data transformation chapter R Data Science.","code":""},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"backends","dir":"","previous_headings":"","what":"Backends","title":"A Grammar of Data Manipulation","text":"addition data frames/tibbles, dplyr makes working computational backends accessible efficient. list alternative backends: arrow larger--memory datasets, including remote cloud storage like AWS S3, using Apache Arrow C++ engine, Acero. dtplyr large, -memory datasets. Translates dplyr code high performance data.table code. dbplyr data stored relational database. Translates dplyr code SQL. duckplyr using duckdb large, -memory datasets zero extra copies. Translates dplyr code high performance duckdb queries automatic R fallback translation isn’t possible. duckdb large datasets still small enough fit computer. sparklyr large datasets stored Apache Spark.","code":""},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"A Grammar of Data Manipulation","text":"","code":"# The easiest way to get dplyr is to install the whole tidyverse: install.packages(\"tidyverse\") # Alternatively, install just dplyr: install.packages(\"dplyr\")"},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"development-version","dir":"","previous_headings":"Installation","what":"Development version","title":"A Grammar of Data Manipulation","text":"get bug fix use feature development version, can install development version dplyr GitHub.","code":"# install.packages(\"pak\") pak::pak(\"tidyverse/dplyr\")"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"A Grammar of Data Manipulation","text":"","code":"library(dplyr) starwars %>% filter(species == \"Droid\") #> # A tibble: 6 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 C-3PO 167 75 gold yellow 112 none masculi… #> 2 R2-D2 96 32 white, blue red 33 none masculi… #> 3 R5-D4 97 32 white, red red NA none masculi… #> 4 IG-88 200 140 none metal red 15 none masculi… #> 5 R4-P17 96 NA none silver, red red, blue NA none feminine #> # ℹ 1 more row #> # ℹ 5 more variables: homeworld , species , films , #> # vehicles , starships starwars %>% select(name, ends_with(\"color\")) #> # A tibble: 87 × 4 #> name hair_color skin_color eye_color #> #> 1 Luke Skywalker blond fair blue #> 2 C-3PO gold yellow #> 3 R2-D2 white, blue red #> 4 Darth Vader none white yellow #> 5 Leia Organa brown light brown #> # ℹ 82 more rows starwars %>% mutate(name, bmi = mass / ((height / 100) ^ 2)) %>% select(name:mass, bmi) #> # A tibble: 87 × 4 #> name height mass bmi #> #> 1 Luke Skywalker 172 77 26.0 #> 2 C-3PO 167 75 26.9 #> 3 R2-D2 96 32 34.7 #> 4 Darth Vader 202 136 33.3 #> 5 Leia Organa 150 49 21.8 #> # ℹ 82 more rows starwars %>% arrange(desc(mass)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 Jabba De… 175 1358 green-tan… orange 600 herm… mascu… #> 2 Grievous 216 159 none brown, wh… green, y… NA male mascu… #> 3 IG-88 200 140 none metal red 15 none mascu… #> 4 Darth Va… 202 136 none white yellow 41.9 male mascu… #> 5 Tarfful 234 136 brown brown blue NA male mascu… #> # ℹ 82 more rows #> # ℹ 5 more variables: homeworld , species , films , #> # vehicles , starships starwars %>% group_by(species) %>% summarise( n = n(), mass = mean(mass, na.rm = TRUE) ) %>% filter( n > 1, mass > 50 ) #> # A tibble: 9 × 3 #> species n mass #> #> 1 Droid 6 69.8 #> 2 Gungan 3 74 #> 3 Human 35 81.3 #> 4 Kaminoan 2 88 #> 5 Mirialan 2 53.1 #> # ℹ 4 more rows"},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"getting-help","dir":"","previous_headings":"","what":"Getting help","title":"A Grammar of Data Manipulation","text":"encounter clear bug, please file issue minimal reproducible example GitHub. questions discussion, please use forum.posit.co.","code":""},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of conduct","title":"A Grammar of Data Manipulation","text":"Please note project released Contributor Code Conduct. participating project agree abide terms.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":null,"dir":"Reference","previous_headings":"","what":"Apply a function (or functions) across multiple columns — across","title":"Apply a function (or functions) across multiple columns — across","text":"across() makes easy apply transformation multiple columns, allowing use select() semantics inside \"data-masking\" functions like summarise() mutate(). See vignette(\"colwise\") details. if_any() if_all() apply predicate function selection columns combine results single logical vector: if_any() TRUE predicate TRUE selected columns, if_all() TRUE predicate TRUE selected columns. just need select columns without applying transformation , probably want use pick() instead. across() supersedes family \"scoped variants\" like summarise_at(), summarise_if(), summarise_all().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Apply a function (or functions) across multiple columns — across","text":"","code":"across(.cols, .fns, ..., .names = NULL, .unpack = FALSE) if_any(.cols, .fns, ..., .names = NULL) if_all(.cols, .fns, ..., .names = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Apply a function (or functions) across multiple columns — across","text":".cols Columns transform. select grouping columns already automatically handled verb (.e. summarise() mutate()). .fns Functions apply selected columns. Possible values : function, e.g. mean. purrr-style lambda, e.g. ~ mean(.x, na.rm = TRUE) named list functions lambdas, e.g. list(mean = mean, n_miss = ~ sum(.na(.x)). function applied column, output named combining function name column name using glue specification .names. Within functions can use cur_column() cur_group() access current column grouping keys respectively. ... Additional arguments function calls .fns longer accepted ... clear evaluated: per across() per group? Instead supply additional arguments directly .fns using lambda. example, instead across(:b, mean, na.rm = TRUE) write across(:b, ~ mean(.x, na.rm = TRUE)). .names glue specification describes name output columns. can use {.col} stand selected column name, {.fn} stand name function applied. default (NULL) equivalent \"{.col}\" single function case \"{.col}_{.fn}\" case list used .fns. .unpack Optionally unpack data frames returned functions .fns, expands df-columns individual columns, retaining number rows data frame. FALSE, default, unpacking done. TRUE, unpacking done default glue specification \"{outer}_{inner}\". Otherwise, single glue specification can supplied describe name unpacked columns. can use {outer} refer name originally generated .names, {inner} refer names data frame unpacking.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Apply a function (or functions) across multiple columns — across","text":"across() typically returns tibble one column column .cols function .fns. .unpack used, columns may returned depending results .fns unpacked. if_any() if_all() return logical vector.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"timing-of-evaluation","dir":"Reference","previous_headings":"","what":"Timing of evaluation","title":"Apply a function (or functions) across multiple columns — across","text":"R code dplyr verbs generally evaluated per group. Inside across() however, code evaluated combination columns groups. evaluation timing important, example generating random variables, think happen place code consequence.","code":"gdf <- tibble(g = c(1, 1, 2, 3), v1 = 10:13, v2 = 20:23) %>% group_by(g) set.seed(1) # Outside: 1 normal variate n <- rnorm(1) gdf %>% mutate(across(v1:v2, ~ .x + n)) #> # A tibble: 4 x 3 #> # Groups: g [3] #> g v1 v2 #> #> 1 1 9.37 19.4 #> 2 1 10.4 20.4 #> 3 2 11.4 21.4 #> 4 3 12.4 22.4 # Inside a verb: 3 normal variates (ngroup) gdf %>% mutate(n = rnorm(1), across(v1:v2, ~ .x + n)) #> # A tibble: 4 x 4 #> # Groups: g [3] #> g v1 v2 n #> #> 1 1 10.2 20.2 0.184 #> 2 1 11.2 21.2 0.184 #> 3 2 11.2 21.2 -0.836 #> 4 3 14.6 24.6 1.60 # Inside `across()`: 6 normal variates (ncol * ngroup) gdf %>% mutate(across(v1:v2, ~ .x + rnorm(1))) #> # A tibble: 4 x 3 #> # Groups: g [3] #> g v1 v2 #> #> 1 1 10.3 20.7 #> 2 1 11.3 21.7 #> 3 2 11.2 22.6 #> 4 3 13.5 22.7"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Apply a function (or functions) across multiple columns — across","text":"","code":"# For better printing iris <- as_tibble(iris) # across() ----------------------------------------------------------------- # Different ways to select the same set of columns # See for details iris %>% mutate(across(c(Sepal.Length, Sepal.Width), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(c(1, 2), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(1:Sepal.Width, round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(where(is.double) & !c(Petal.Length, Petal.Width), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows # Using an external vector of names cols <- c(\"Sepal.Length\", \"Petal.Width\") iris %>% mutate(across(all_of(cols), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 3.5 1.4 0 setosa #> 2 5 3 1.4 0 setosa #> 3 5 3.2 1.3 0 setosa #> 4 5 3.1 1.5 0 setosa #> 5 5 3.6 1.4 0 setosa #> 6 5 3.9 1.7 0 setosa #> 7 5 3.4 1.4 0 setosa #> 8 5 3.4 1.5 0 setosa #> 9 4 2.9 1.4 0 setosa #> 10 5 3.1 1.5 0 setosa #> # ℹ 140 more rows # If the external vector is named, the output columns will be named according # to those names names(cols) <- tolower(cols) iris %>% mutate(across(all_of(cols), round)) #> # A tibble: 150 × 7 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal.length #> #> 1 5.1 3.5 1.4 0.2 setosa 5 #> 2 4.9 3 1.4 0.2 setosa 5 #> 3 4.7 3.2 1.3 0.2 setosa 5 #> 4 4.6 3.1 1.5 0.2 setosa 5 #> 5 5 3.6 1.4 0.2 setosa 5 #> 6 5.4 3.9 1.7 0.4 setosa 5 #> 7 4.6 3.4 1.4 0.3 setosa 5 #> 8 5 3.4 1.5 0.2 setosa 5 #> 9 4.4 2.9 1.4 0.2 setosa 4 #> 10 4.9 3.1 1.5 0.1 setosa 5 #> # ℹ 140 more rows #> # ℹ 1 more variable: petal.width # A purrr-style formula iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 3 × 3 #> Species Sepal.Length Sepal.Width #> #> 1 setosa 5.01 3.43 #> 2 versicolor 5.94 2.77 #> 3 virginica 6.59 2.97 # A named list of functions iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), list(mean = mean, sd = sd))) #> # A tibble: 3 × 5 #> Species Sepal.Length_mean Sepal.Length_sd Sepal.Width_mean #> #> 1 setosa 5.01 0.352 3.43 #> 2 versicolor 5.94 0.516 2.77 #> 3 virginica 6.59 0.636 2.97 #> # ℹ 1 more variable: Sepal.Width_sd # Use the .names argument to control the output names iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), mean, .names = \"mean_{.col}\")) #> # A tibble: 3 × 3 #> Species mean_Sepal.Length mean_Sepal.Width #> #> 1 setosa 5.01 3.43 #> 2 versicolor 5.94 2.77 #> 3 virginica 6.59 2.97 iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), list(mean = mean, sd = sd), .names = \"{.col}.{.fn}\")) #> # A tibble: 3 × 5 #> Species Sepal.Length.mean Sepal.Length.sd Sepal.Width.mean #> #> 1 setosa 5.01 0.352 3.43 #> 2 versicolor 5.94 0.516 2.77 #> 3 virginica 6.59 0.636 2.97 #> # ℹ 1 more variable: Sepal.Width.sd # If a named external vector is used for column selection, .names will use # those names when constructing the output names iris %>% group_by(Species) %>% summarise(across(all_of(cols), mean, .names = \"mean_{.col}\")) #> # A tibble: 3 × 3 #> Species mean_sepal.length mean_petal.width #> #> 1 setosa 5.01 0.246 #> 2 versicolor 5.94 1.33 #> 3 virginica 6.59 2.03 # When the list is not named, .fn is replaced by the function's position iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), list(mean, sd), .names = \"{.col}.fn{.fn}\")) #> # A tibble: 3 × 5 #> Species Sepal.Length.fn1 Sepal.Length.fn2 Sepal.Width.fn1 #> #> 1 setosa 5.01 0.352 3.43 #> 2 versicolor 5.94 0.516 2.77 #> 3 virginica 6.59 0.636 2.97 #> # ℹ 1 more variable: Sepal.Width.fn2 # When the functions in .fns return a data frame, you typically get a # \"packed\" data frame back quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble(quantile = probs, value = quantile(x, probs)) } iris %>% reframe(across(starts_with(\"Sepal\"), quantile_df)) #> # A tibble: 3 × 2 #> Sepal.Length$quantile $value Sepal.Width$quantile $value #> #> 1 0.25 5.1 0.25 2.8 #> 2 0.5 5.8 0.5 3 #> 3 0.75 6.4 0.75 3.3 # Use .unpack to automatically expand these packed data frames into their # individual columns iris %>% reframe(across(starts_with(\"Sepal\"), quantile_df, .unpack = TRUE)) #> # A tibble: 3 × 4 #> Sepal.Length_quantile Sepal.Length_value Sepal.Width_quantile #> #> 1 0.25 5.1 0.25 #> 2 0.5 5.8 0.5 #> 3 0.75 6.4 0.75 #> # ℹ 1 more variable: Sepal.Width_value # .unpack can utilize a glue specification if you don't like the defaults iris %>% reframe(across(starts_with(\"Sepal\"), quantile_df, .unpack = \"{outer}.{inner}\")) #> # A tibble: 3 × 4 #> Sepal.Length.quantile Sepal.Length.value Sepal.Width.quantile #> #> 1 0.25 5.1 0.25 #> 2 0.5 5.8 0.5 #> 3 0.75 6.4 0.75 #> # ℹ 1 more variable: Sepal.Width.value # This is also useful inside mutate(), for example, with a multi-lag helper multilag <- function(x, lags = 1:3) { names(lags) <- as.character(lags) purrr::map_dfr(lags, lag, x = x) } iris %>% group_by(Species) %>% mutate(across(starts_with(\"Sepal\"), multilag, .unpack = TRUE)) %>% select(Species, starts_with(\"Sepal\")) #> # A tibble: 150 × 9 #> # Groups: Species [3] #> Species Sepal.Length Sepal.Width Sepal.Length_1 Sepal.Length_2 #> #> 1 setosa 5.1 3.5 NA NA #> 2 setosa 4.9 3 5.1 NA #> 3 setosa 4.7 3.2 4.9 5.1 #> 4 setosa 4.6 3.1 4.7 4.9 #> 5 setosa 5 3.6 4.6 4.7 #> 6 setosa 5.4 3.9 5 4.6 #> 7 setosa 4.6 3.4 5.4 5 #> 8 setosa 5 3.4 4.6 5.4 #> 9 setosa 4.4 2.9 5 4.6 #> 10 setosa 4.9 3.1 4.4 5 #> # ℹ 140 more rows #> # ℹ 4 more variables: Sepal.Length_3 , Sepal.Width_1 , #> # Sepal.Width_2 , Sepal.Width_3 # if_any() and if_all() ---------------------------------------------------- iris %>% filter(if_any(ends_with(\"Width\"), ~ . > 4)) #> # A tibble: 3 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.7 4.4 1.5 0.4 setosa #> 2 5.2 4.1 1.5 0.1 setosa #> 3 5.5 4.2 1.4 0.2 setosa iris %>% filter(if_all(ends_with(\"Width\"), ~ . > 2)) #> # A tibble: 23 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 6.3 3.3 6 2.5 virginica #> 2 7.1 3 5.9 2.1 virginica #> 3 6.5 3 5.8 2.2 virginica #> 4 7.6 3 6.6 2.1 virginica #> 5 7.2 3.6 6.1 2.5 virginica #> 6 6.8 3 5.5 2.1 virginica #> 7 5.8 2.8 5.1 2.4 virginica #> 8 6.4 3.2 5.3 2.3 virginica #> 9 7.7 3.8 6.7 2.2 virginica #> 10 7.7 2.6 6.9 2.3 virginica #> # ℹ 13 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/add_rownames.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert row names to an explicit variable. — add_rownames","title":"Convert row names to an explicit variable. — add_rownames","text":"Please use tibble::rownames_to_column() instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/add_rownames.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert row names to an explicit variable. — add_rownames","text":"","code":"add_rownames(df, var = \"rowname\")"},{"path":"https://dplyr.tidyverse.org/dev/reference/add_rownames.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert row names to an explicit variable. — add_rownames","text":"df Input data frame rownames. var Name variable use","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":null,"dir":"Reference","previous_headings":"","what":"Flexible equality comparison for data frames — all_equal","title":"Flexible equality comparison for data frames — all_equal","text":"all_equal() allows compare data frames, optionally ignoring row column names. deprecated dplyr 1.1.0, makes easy ignore important differences.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Flexible equality comparison for data frames — all_equal","text":"","code":"all_equal( target, current, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE, ... )"},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Flexible equality comparison for data frames — all_equal","text":"target, current Two data frames compare. ignore_col_order order columns ignored? ignore_row_order order rows ignored? convert similar classes converted? Currently convert factor character integer double. ... Ignored. Needed compatibility .equal().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Flexible equality comparison for data frames — all_equal","text":"TRUE equal, otherwise character vector describing reasons equal. Use isTRUE() using result expression.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Flexible equality comparison for data frames — all_equal","text":"","code":"scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] # `all_equal()` ignored row and column ordering by default, # but we now feel that that makes it too easy to make mistakes mtcars2 <- scramble(mtcars) all_equal(mtcars, mtcars2) #> Warning: `all_equal()` was deprecated in dplyr 1.1.0. #> ℹ Please use `all.equal()` instead. #> ℹ And manually order the rows/cols as needed #> [1] TRUE # Instead, be explicit about the row and column ordering all.equal( mtcars, mtcars2[rownames(mtcars), names(mtcars)] ) #> [1] TRUE"},{"path":"https://dplyr.tidyverse.org/dev/reference/all_vars.html","id":null,"dir":"Reference","previous_headings":"","what":"Apply predicate to all variables — all_vars","title":"Apply predicate to all variables — all_vars","text":"all_vars() any_vars() needed scoped verbs, superseded use across() existing verb. See vignette(\"colwise\") details. quoting functions signal scoped filtering verbs (e.g. filter_if() filter_all()) predicate expression applied relevant variables. all_vars() variant takes intersection predicate expressions & any_vars() variant takes union |.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_vars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Apply predicate to all variables — all_vars","text":"","code":"all_vars(expr) any_vars(expr)"},{"path":"https://dplyr.tidyverse.org/dev/reference/all_vars.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Apply predicate to all variables — all_vars","text":"expr expression returns logical vector, using . refer \"current\" variable.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/args_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Helper for consistent documentation of .by — args_by","title":"Helper for consistent documentation of .by — args_by","text":"Use @inheritParams args_by consistently document ..","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/args_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Helper for consistent documentation of .by — args_by","text":". Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":null,"dir":"Reference","previous_headings":"","what":"Order rows using column values — arrange","title":"Order rows using column values — arrange","text":"arrange() orders rows data frame values selected columns. Unlike dplyr verbs, arrange() largely ignores grouping; need explicitly mention grouping variables (use .by_group = TRUE) order group , functions variables evaluated per data frame, per group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Order rows using column values — arrange","text":"","code":"arrange(.data, ..., .by_group = FALSE) # S3 method for data.frame arrange(.data, ..., .by_group = FALSE, .locale = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Order rows using column values — arrange","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Variables, functions variables. Use desc() sort variable descending order. .by_group TRUE, sort first grouping variable. Applies grouped data frames . .locale locale sort character vectors . NULL, default, uses \"C\" locale unless dplyr.legacy_locale global option escape hatch active. See dplyr-locale help page details. single string stringi::stri_locale_list() supplied, used locale sort . example, \"en\" sort American English locale. requires stringi package. \"C\" supplied, character vectors always sorted C locale. require stringi often much faster supplying locale identifier. C locale English locales, \"en\", particularly comes data containing mix upper lower case letters. explained detail locale help page Default locale section.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Order rows using column values — arrange","text":"object type .data. output following properties: rows appear output, (usually) different place. Columns modified. Groups modified. Data frame attributes preserved.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"missing-values","dir":"Reference","previous_headings":"","what":"Missing values","title":"Order rows using column values — arrange","text":"Unlike base sorting sort(), NA : always sorted end local data, even wrapped desc(). treated differently remote data, depending backend.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Order rows using column values — arrange","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Order rows using column values — arrange","text":"","code":"arrange(mtcars, cyl, disp) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 arrange(mtcars, desc(disp)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 # grouped arrange ignores groups by_cyl <- mtcars %>% group_by(cyl) by_cyl %>% arrange(desc(wt)) #> # A tibble: 32 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 2 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 3 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 4 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 #> 5 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> 6 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 8 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3 #> 9 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 10 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> # ℹ 22 more rows # Unless you specifically ask: by_cyl %>% arrange(desc(wt), .by_group = TRUE) #> # A tibble: 32 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 2 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 3 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 4 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 5 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 6 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 7 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 8 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 9 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 10 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> # ℹ 22 more rows # use embracing when wrapping in a function; # see ?rlang::args_data_masking for more details tidy_eval_arrange <- function(.data, var) { .data %>% arrange({{ var }}) } tidy_eval_arrange(mtcars, mpg) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 # Use `across()` or `pick()` to select columns with tidy-select iris %>% arrange(pick(starts_with(\"Sepal\"))) #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 4.3 3.0 1.1 0.1 setosa #> 2 4.4 2.9 1.4 0.2 setosa #> 3 4.4 3.0 1.3 0.2 setosa #> 4 4.4 3.2 1.3 0.2 setosa #> 5 4.5 2.3 1.3 0.3 setosa #> 6 4.6 3.1 1.5 0.2 setosa #> 7 4.6 3.2 1.4 0.2 setosa #> 8 4.6 3.4 1.4 0.3 setosa #> 9 4.6 3.6 1.0 0.2 setosa #> 10 4.7 3.2 1.3 0.2 setosa #> 11 4.7 3.2 1.6 0.2 setosa #> 12 4.8 3.0 1.4 0.1 setosa #> 13 4.8 3.0 1.4 0.3 setosa #> 14 4.8 3.1 1.6 0.2 setosa #> 15 4.8 3.4 1.6 0.2 setosa #> 16 4.8 3.4 1.9 0.2 setosa #> 17 4.9 2.4 3.3 1.0 versicolor #> 18 4.9 2.5 4.5 1.7 virginica #> 19 4.9 3.0 1.4 0.2 setosa #> 20 4.9 3.1 1.5 0.1 setosa #> 21 4.9 3.1 1.5 0.2 setosa #> 22 4.9 3.6 1.4 0.1 setosa #> 23 5.0 2.0 3.5 1.0 versicolor #> 24 5.0 2.3 3.3 1.0 versicolor #> 25 5.0 3.0 1.6 0.2 setosa #> 26 5.0 3.2 1.2 0.2 setosa #> 27 5.0 3.3 1.4 0.2 setosa #> 28 5.0 3.4 1.5 0.2 setosa #> 29 5.0 3.4 1.6 0.4 setosa #> 30 5.0 3.5 1.3 0.3 setosa #> 31 5.0 3.5 1.6 0.6 setosa #> 32 5.0 3.6 1.4 0.2 setosa #> 33 5.1 2.5 3.0 1.1 versicolor #> 34 5.1 3.3 1.7 0.5 setosa #> 35 5.1 3.4 1.5 0.2 setosa #> 36 5.1 3.5 1.4 0.2 setosa #> 37 5.1 3.5 1.4 0.3 setosa #> 38 5.1 3.7 1.5 0.4 setosa #> 39 5.1 3.8 1.5 0.3 setosa #> 40 5.1 3.8 1.9 0.4 setosa #> 41 5.1 3.8 1.6 0.2 setosa #> 42 5.2 2.7 3.9 1.4 versicolor #> 43 5.2 3.4 1.4 0.2 setosa #> 44 5.2 3.5 1.5 0.2 setosa #> 45 5.2 4.1 1.5 0.1 setosa #> 46 5.3 3.7 1.5 0.2 setosa #> 47 5.4 3.0 4.5 1.5 versicolor #> 48 5.4 3.4 1.7 0.2 setosa #> 49 5.4 3.4 1.5 0.4 setosa #> 50 5.4 3.7 1.5 0.2 setosa #> 51 5.4 3.9 1.7 0.4 setosa #> 52 5.4 3.9 1.3 0.4 setosa #> 53 5.5 2.3 4.0 1.3 versicolor #> 54 5.5 2.4 3.8 1.1 versicolor #> 55 5.5 2.4 3.7 1.0 versicolor #> 56 5.5 2.5 4.0 1.3 versicolor #> 57 5.5 2.6 4.4 1.2 versicolor #> 58 5.5 3.5 1.3 0.2 setosa #> 59 5.5 4.2 1.4 0.2 setosa #> 60 5.6 2.5 3.9 1.1 versicolor #> 61 5.6 2.7 4.2 1.3 versicolor #> 62 5.6 2.8 4.9 2.0 virginica #> 63 5.6 2.9 3.6 1.3 versicolor #> 64 5.6 3.0 4.5 1.5 versicolor #> 65 5.6 3.0 4.1 1.3 versicolor #> 66 5.7 2.5 5.0 2.0 virginica #> 67 5.7 2.6 3.5 1.0 versicolor #> 68 5.7 2.8 4.5 1.3 versicolor #> 69 5.7 2.8 4.1 1.3 versicolor #> 70 5.7 2.9 4.2 1.3 versicolor #> 71 5.7 3.0 4.2 1.2 versicolor #> 72 5.7 3.8 1.7 0.3 setosa #> 73 5.7 4.4 1.5 0.4 setosa #> 74 5.8 2.6 4.0 1.2 versicolor #> 75 5.8 2.7 4.1 1.0 versicolor #> 76 5.8 2.7 3.9 1.2 versicolor #> 77 5.8 2.7 5.1 1.9 virginica #> 78 5.8 2.7 5.1 1.9 virginica #> 79 5.8 2.8 5.1 2.4 virginica #> 80 5.8 4.0 1.2 0.2 setosa #> 81 5.9 3.0 4.2 1.5 versicolor #> 82 5.9 3.0 5.1 1.8 virginica #> 83 5.9 3.2 4.8 1.8 versicolor #> 84 6.0 2.2 4.0 1.0 versicolor #> 85 6.0 2.2 5.0 1.5 virginica #> 86 6.0 2.7 5.1 1.6 versicolor #> 87 6.0 2.9 4.5 1.5 versicolor #> 88 6.0 3.0 4.8 1.8 virginica #> 89 6.0 3.4 4.5 1.6 versicolor #> 90 6.1 2.6 5.6 1.4 virginica #> 91 6.1 2.8 4.0 1.3 versicolor #> 92 6.1 2.8 4.7 1.2 versicolor #> 93 6.1 2.9 4.7 1.4 versicolor #> 94 6.1 3.0 4.6 1.4 versicolor #> 95 6.1 3.0 4.9 1.8 virginica #> 96 6.2 2.2 4.5 1.5 versicolor #> 97 6.2 2.8 4.8 1.8 virginica #> 98 6.2 2.9 4.3 1.3 versicolor #> 99 6.2 3.4 5.4 2.3 virginica #> 100 6.3 2.3 4.4 1.3 versicolor #> 101 6.3 2.5 4.9 1.5 versicolor #> 102 6.3 2.5 5.0 1.9 virginica #> 103 6.3 2.7 4.9 1.8 virginica #> 104 6.3 2.8 5.1 1.5 virginica #> 105 6.3 2.9 5.6 1.8 virginica #> 106 6.3 3.3 4.7 1.6 versicolor #> 107 6.3 3.3 6.0 2.5 virginica #> 108 6.3 3.4 5.6 2.4 virginica #> 109 6.4 2.7 5.3 1.9 virginica #> 110 6.4 2.8 5.6 2.1 virginica #> 111 6.4 2.8 5.6 2.2 virginica #> 112 6.4 2.9 4.3 1.3 versicolor #> 113 6.4 3.1 5.5 1.8 virginica #> 114 6.4 3.2 4.5 1.5 versicolor #> 115 6.4 3.2 5.3 2.3 virginica #> 116 6.5 2.8 4.6 1.5 versicolor #> 117 6.5 3.0 5.8 2.2 virginica #> 118 6.5 3.0 5.5 1.8 virginica #> 119 6.5 3.0 5.2 2.0 virginica #> 120 6.5 3.2 5.1 2.0 virginica #> 121 6.6 2.9 4.6 1.3 versicolor #> 122 6.6 3.0 4.4 1.4 versicolor #> 123 6.7 2.5 5.8 1.8 virginica #> 124 6.7 3.0 5.0 1.7 versicolor #> 125 6.7 3.0 5.2 2.3 virginica #> 126 6.7 3.1 4.4 1.4 versicolor #> 127 6.7 3.1 4.7 1.5 versicolor #> 128 6.7 3.1 5.6 2.4 virginica #> 129 6.7 3.3 5.7 2.1 virginica #> 130 6.7 3.3 5.7 2.5 virginica #> 131 6.8 2.8 4.8 1.4 versicolor #> 132 6.8 3.0 5.5 2.1 virginica #> 133 6.8 3.2 5.9 2.3 virginica #> 134 6.9 3.1 4.9 1.5 versicolor #> 135 6.9 3.1 5.4 2.1 virginica #> 136 6.9 3.1 5.1 2.3 virginica #> 137 6.9 3.2 5.7 2.3 virginica #> 138 7.0 3.2 4.7 1.4 versicolor #> 139 7.1 3.0 5.9 2.1 virginica #> 140 7.2 3.0 5.8 1.6 virginica #> 141 7.2 3.2 6.0 1.8 virginica #> 142 7.2 3.6 6.1 2.5 virginica #> 143 7.3 2.9 6.3 1.8 virginica #> 144 7.4 2.8 6.1 1.9 virginica #> 145 7.6 3.0 6.6 2.1 virginica #> 146 7.7 2.6 6.9 2.3 virginica #> 147 7.7 2.8 6.7 2.0 virginica #> 148 7.7 3.0 6.1 2.3 virginica #> 149 7.7 3.8 6.7 2.2 virginica #> 150 7.9 3.8 6.4 2.0 virginica iris %>% arrange(across(starts_with(\"Sepal\"), desc)) #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 7.9 3.8 6.4 2.0 virginica #> 2 7.7 3.8 6.7 2.2 virginica #> 3 7.7 3.0 6.1 2.3 virginica #> 4 7.7 2.8 6.7 2.0 virginica #> 5 7.7 2.6 6.9 2.3 virginica #> 6 7.6 3.0 6.6 2.1 virginica #> 7 7.4 2.8 6.1 1.9 virginica #> 8 7.3 2.9 6.3 1.8 virginica #> 9 7.2 3.6 6.1 2.5 virginica #> 10 7.2 3.2 6.0 1.8 virginica #> 11 7.2 3.0 5.8 1.6 virginica #> 12 7.1 3.0 5.9 2.1 virginica #> 13 7.0 3.2 4.7 1.4 versicolor #> 14 6.9 3.2 5.7 2.3 virginica #> 15 6.9 3.1 4.9 1.5 versicolor #> 16 6.9 3.1 5.4 2.1 virginica #> 17 6.9 3.1 5.1 2.3 virginica #> 18 6.8 3.2 5.9 2.3 virginica #> 19 6.8 3.0 5.5 2.1 virginica #> 20 6.8 2.8 4.8 1.4 versicolor #> 21 6.7 3.3 5.7 2.1 virginica #> 22 6.7 3.3 5.7 2.5 virginica #> 23 6.7 3.1 4.4 1.4 versicolor #> 24 6.7 3.1 4.7 1.5 versicolor #> 25 6.7 3.1 5.6 2.4 virginica #> 26 6.7 3.0 5.0 1.7 versicolor #> 27 6.7 3.0 5.2 2.3 virginica #> 28 6.7 2.5 5.8 1.8 virginica #> 29 6.6 3.0 4.4 1.4 versicolor #> 30 6.6 2.9 4.6 1.3 versicolor #> 31 6.5 3.2 5.1 2.0 virginica #> 32 6.5 3.0 5.8 2.2 virginica #> 33 6.5 3.0 5.5 1.8 virginica #> 34 6.5 3.0 5.2 2.0 virginica #> 35 6.5 2.8 4.6 1.5 versicolor #> 36 6.4 3.2 4.5 1.5 versicolor #> 37 6.4 3.2 5.3 2.3 virginica #> 38 6.4 3.1 5.5 1.8 virginica #> 39 6.4 2.9 4.3 1.3 versicolor #> 40 6.4 2.8 5.6 2.1 virginica #> 41 6.4 2.8 5.6 2.2 virginica #> 42 6.4 2.7 5.3 1.9 virginica #> 43 6.3 3.4 5.6 2.4 virginica #> 44 6.3 3.3 4.7 1.6 versicolor #> 45 6.3 3.3 6.0 2.5 virginica #> 46 6.3 2.9 5.6 1.8 virginica #> 47 6.3 2.8 5.1 1.5 virginica #> 48 6.3 2.7 4.9 1.8 virginica #> 49 6.3 2.5 4.9 1.5 versicolor #> 50 6.3 2.5 5.0 1.9 virginica #> 51 6.3 2.3 4.4 1.3 versicolor #> 52 6.2 3.4 5.4 2.3 virginica #> 53 6.2 2.9 4.3 1.3 versicolor #> 54 6.2 2.8 4.8 1.8 virginica #> 55 6.2 2.2 4.5 1.5 versicolor #> 56 6.1 3.0 4.6 1.4 versicolor #> 57 6.1 3.0 4.9 1.8 virginica #> 58 6.1 2.9 4.7 1.4 versicolor #> 59 6.1 2.8 4.0 1.3 versicolor #> 60 6.1 2.8 4.7 1.2 versicolor #> 61 6.1 2.6 5.6 1.4 virginica #> 62 6.0 3.4 4.5 1.6 versicolor #> 63 6.0 3.0 4.8 1.8 virginica #> 64 6.0 2.9 4.5 1.5 versicolor #> 65 6.0 2.7 5.1 1.6 versicolor #> 66 6.0 2.2 4.0 1.0 versicolor #> 67 6.0 2.2 5.0 1.5 virginica #> 68 5.9 3.2 4.8 1.8 versicolor #> 69 5.9 3.0 4.2 1.5 versicolor #> 70 5.9 3.0 5.1 1.8 virginica #> 71 5.8 4.0 1.2 0.2 setosa #> 72 5.8 2.8 5.1 2.4 virginica #> 73 5.8 2.7 4.1 1.0 versicolor #> 74 5.8 2.7 3.9 1.2 versicolor #> 75 5.8 2.7 5.1 1.9 virginica #> 76 5.8 2.7 5.1 1.9 virginica #> 77 5.8 2.6 4.0 1.2 versicolor #> 78 5.7 4.4 1.5 0.4 setosa #> 79 5.7 3.8 1.7 0.3 setosa #> 80 5.7 3.0 4.2 1.2 versicolor #> 81 5.7 2.9 4.2 1.3 versicolor #> 82 5.7 2.8 4.5 1.3 versicolor #> 83 5.7 2.8 4.1 1.3 versicolor #> 84 5.7 2.6 3.5 1.0 versicolor #> 85 5.7 2.5 5.0 2.0 virginica #> 86 5.6 3.0 4.5 1.5 versicolor #> 87 5.6 3.0 4.1 1.3 versicolor #> 88 5.6 2.9 3.6 1.3 versicolor #> 89 5.6 2.8 4.9 2.0 virginica #> 90 5.6 2.7 4.2 1.3 versicolor #> 91 5.6 2.5 3.9 1.1 versicolor #> 92 5.5 4.2 1.4 0.2 setosa #> 93 5.5 3.5 1.3 0.2 setosa #> 94 5.5 2.6 4.4 1.2 versicolor #> 95 5.5 2.5 4.0 1.3 versicolor #> 96 5.5 2.4 3.8 1.1 versicolor #> 97 5.5 2.4 3.7 1.0 versicolor #> 98 5.5 2.3 4.0 1.3 versicolor #> 99 5.4 3.9 1.7 0.4 setosa #> 100 5.4 3.9 1.3 0.4 setosa #> 101 5.4 3.7 1.5 0.2 setosa #> 102 5.4 3.4 1.7 0.2 setosa #> 103 5.4 3.4 1.5 0.4 setosa #> 104 5.4 3.0 4.5 1.5 versicolor #> 105 5.3 3.7 1.5 0.2 setosa #> 106 5.2 4.1 1.5 0.1 setosa #> 107 5.2 3.5 1.5 0.2 setosa #> 108 5.2 3.4 1.4 0.2 setosa #> 109 5.2 2.7 3.9 1.4 versicolor #> 110 5.1 3.8 1.5 0.3 setosa #> 111 5.1 3.8 1.9 0.4 setosa #> 112 5.1 3.8 1.6 0.2 setosa #> 113 5.1 3.7 1.5 0.4 setosa #> 114 5.1 3.5 1.4 0.2 setosa #> 115 5.1 3.5 1.4 0.3 setosa #> 116 5.1 3.4 1.5 0.2 setosa #> 117 5.1 3.3 1.7 0.5 setosa #> 118 5.1 2.5 3.0 1.1 versicolor #> 119 5.0 3.6 1.4 0.2 setosa #> 120 5.0 3.5 1.3 0.3 setosa #> 121 5.0 3.5 1.6 0.6 setosa #> 122 5.0 3.4 1.5 0.2 setosa #> 123 5.0 3.4 1.6 0.4 setosa #> 124 5.0 3.3 1.4 0.2 setosa #> 125 5.0 3.2 1.2 0.2 setosa #> 126 5.0 3.0 1.6 0.2 setosa #> 127 5.0 2.3 3.3 1.0 versicolor #> 128 5.0 2.0 3.5 1.0 versicolor #> 129 4.9 3.6 1.4 0.1 setosa #> 130 4.9 3.1 1.5 0.1 setosa #> 131 4.9 3.1 1.5 0.2 setosa #> 132 4.9 3.0 1.4 0.2 setosa #> 133 4.9 2.5 4.5 1.7 virginica #> 134 4.9 2.4 3.3 1.0 versicolor #> 135 4.8 3.4 1.6 0.2 setosa #> 136 4.8 3.4 1.9 0.2 setosa #> 137 4.8 3.1 1.6 0.2 setosa #> 138 4.8 3.0 1.4 0.1 setosa #> 139 4.8 3.0 1.4 0.3 setosa #> 140 4.7 3.2 1.3 0.2 setosa #> 141 4.7 3.2 1.6 0.2 setosa #> 142 4.6 3.6 1.0 0.2 setosa #> 143 4.6 3.4 1.4 0.3 setosa #> 144 4.6 3.2 1.4 0.2 setosa #> 145 4.6 3.1 1.5 0.2 setosa #> 146 4.5 2.3 1.3 0.3 setosa #> 147 4.4 3.2 1.3 0.2 setosa #> 148 4.4 3.0 1.3 0.2 setosa #> 149 4.4 2.9 1.4 0.2 setosa #> 150 4.3 3.0 1.1 0.1 setosa"},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Arrange rows by a selection of variables — arrange_all","title":"Arrange rows by a selection of variables — arrange_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants arrange() sort data frame selection variables. Like arrange(), can modify variables ordering .funs argument.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Arrange rows by a selection of variables — arrange_all","text":"","code":"arrange_all(.tbl, .funs = list(), ..., .by_group = FALSE, .locale = NULL) arrange_at(.tbl, .vars, .funs = list(), ..., .by_group = FALSE, .locale = NULL) arrange_if( .tbl, .predicate, .funs = list(), ..., .by_group = FALSE, .locale = NULL )"},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Arrange rows by a selection of variables — arrange_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .by_group TRUE, sort first grouping variable. Applies grouped data frames . .locale locale sort character vectors . NULL, default, uses \"C\" locale unless dplyr.legacy_locale global option escape hatch active. See dplyr-locale help page details. single string stringi::stri_locale_list() supplied, used locale sort . example, \"en\" sort American English locale. requires stringi package. \"C\" supplied, character vectors always sorted C locale. require stringi often much faster supplying locale identifier. C locale English locales, \"en\", particularly comes data containing mix upper lower case letters. explained detail locale help page Default locale section. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Arrange rows by a selection of variables — arrange_all","text":"grouping variables part selection participate sorting data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Arrange rows by a selection of variables — arrange_all","text":"","code":"df <- as_tibble(mtcars) arrange_all(df) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 2 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 3 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 4 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 5 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 6 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 8 15.2 8 304 150 3.15 3.44 17.3 0 0 3 2 #> 9 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2 #> 10 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> # ℹ 22 more rows # -> arrange(df, pick(everything())) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 2 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 3 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 4 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 5 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 6 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 8 15.2 8 304 150 3.15 3.44 17.3 0 0 3 2 #> 9 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2 #> 10 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> # ℹ 22 more rows arrange_all(df, desc) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 3 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 4 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 5 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 6 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 7 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 8 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 9 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 10 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> # ℹ 22 more rows # -> arrange(df, across(everything(), desc)) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 3 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 4 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 5 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 6 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 7 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 8 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 9 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 10 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> # ℹ 22 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/auto_copy.html","id":null,"dir":"Reference","previous_headings":"","what":"Copy tables to same source, if necessary — auto_copy","title":"Copy tables to same source, if necessary — auto_copy","text":"Copy tables source, necessary","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/auto_copy.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Copy tables to same source, if necessary — auto_copy","text":"","code":"auto_copy(x, y, copy = FALSE, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/auto_copy.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Copy tables to same source, if necessary — auto_copy","text":"x, y y copied x, necessary. copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . ... arguments passed methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":null,"dir":"Reference","previous_headings":"","what":"Database and SQL generics. — backend_dbplyr","title":"Database and SQL generics. — backend_dbplyr","text":"sql_ generics used build different types SQL queries. default implementations dbplyr generates ANSI 92 compliant SQL. db_ generics execute actions database. default implementations dbplyr typically just call standard DBI S4 method.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Database and SQL generics. — backend_dbplyr","text":"","code":"db_desc(x) sql_translate_env(con) db_list_tables(con) db_has_table(con, table) db_data_type(con, fields) db_save_query(con, sql, name, temporary = TRUE, ...) db_begin(con, ...) db_commit(con, ...) db_rollback(con, ...) db_write_table(con, table, types, values, temporary = FALSE, ...) db_create_table(con, table, types, temporary = FALSE, ...) db_insert_into(con, table, values, ...) db_create_indexes(con, table, indexes = NULL, unique = FALSE, ...) db_create_index(con, table, columns, name = NULL, unique = FALSE, ...) db_drop_table(con, table, force = FALSE, ...) db_analyze(con, table, ...) db_explain(con, sql, ...) db_query_fields(con, sql, ...) db_query_rows(con, sql, ...) sql_select( con, select, from, where = NULL, group_by = NULL, having = NULL, order_by = NULL, limit = NULL, distinct = FALSE, ... ) sql_subquery(con, from, name = random_table_name(), ...) sql_join(con, x, y, vars, type = \"inner\", by = NULL, ...) sql_semi_join(con, x, y, anti = FALSE, by = NULL, ...) sql_set_op(con, x, y, method) sql_escape_string(con, x) sql_escape_ident(con, x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Database and SQL generics. — backend_dbplyr","text":"con database connection. table string, table name. fields list fields, data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Database and SQL generics. — backend_dbplyr","text":"Usually logical value indicating success. failures generate error. However, db_has_table() return NA temporary tables listed DBI::dbListTables() (due backend API limitations example). result, methods rely backend throw error table exists .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Database and SQL generics. — backend_dbplyr","text":"backend methods call standard DBI S4 methods including db_data_type(): Calls DBI::dbDataType() every field (e.g. data frame column) returns vector corresponding SQL data types db_save_query(): Builds executes CREATE [TEMPORARY] TABLE

      ... SQL command. db_create_index(): Builds executes CREATE INDEX
      SQL command. db_drop_table(): Builds executes DROP TABLE [EXISTS]
      SQL command. db_analyze(): Builds executes ANALYZE
      SQL command. Currently, copy_to() user db_begin(), db_commit(), db_rollback(), db_write_table(), db_create_indexes(), db_drop_table() db_analyze(). find overriding many functions may suggest just override copy_to() instead. db_create_table() db_insert_into() deprecated favour db_write_table().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":null,"dir":"Reference","previous_headings":"","what":"Band membership — band_members","title":"Band membership — band_members","text":"data sets describe band members Beatles Rolling Stones. toy data sets can displayed entirety slide (e.g. demonstrate join).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Band membership — band_members","text":"","code":"band_members band_instruments band_instruments2"},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Band membership — band_members","text":"tibble two variables three observations","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Band membership — band_members","text":"band_instruments band_instruments2 contain data use different column names first column data set. band_instruments uses name, matches name key column band_members; band_instruments2 uses artist, .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Band membership — band_members","text":"","code":"band_members #> # A tibble: 3 × 2 #> name band #> #> 1 Mick Stones #> 2 John Beatles #> 3 Paul Beatles band_instruments #> # A tibble: 3 × 2 #> name plays #> #> 1 John guitar #> 2 Paul bass #> 3 Keith guitar band_instruments2 #> # A tibble: 3 × 2 #> artist plays #> #> 1 John guitar #> 2 Paul bass #> 3 Keith guitar"},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect where values fall in a specified range — between","title":"Detect where values fall in a specified range — between","text":"shortcut x >= left & x <= right, implemented local vectors translated appropriate SQL remote tables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect where values fall in a specified range — between","text":"","code":"between(x, left, right)"},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect where values fall in a specified range — between","text":"x vector left, right Boundary values. left right recycled size x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect where values fall in a specified range — between","text":"logical vector size x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Detect where values fall in a specified range — between","text":"x, left, right cast common type comparison made.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect where values fall in a specified range — between","text":"","code":"between(1:12, 7, 9) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE #> [12] FALSE x <- rnorm(1e2) x[between(x, -1, 1)] #> [1] 0.93536319 0.17648861 0.24368546 0.11203808 -0.13399701 #> [6] -0.27923724 -0.31344598 0.07003485 -0.63912332 -0.04996490 #> [11] -0.25148344 0.44479712 0.04653138 0.57770907 0.11819487 #> [16] 0.86208648 -0.24323674 -0.20608719 0.01917759 0.02956075 #> [21] 0.54982754 -0.36122126 0.21335575 -0.66508825 -0.24589641 #> [26] -0.97585062 0.13167063 0.48862881 0.28415034 0.23669628 #> [31] 0.52390979 0.60674805 -0.10993567 0.17218172 -0.09032729 #> [36] 0.74879127 0.55622433 -0.54825726 -0.15569378 0.43388979 #> [41] -0.38195111 0.42418757 -0.03810289 0.48614892 -0.35436116 #> [46] 0.94634789 -0.29664002 -0.38721358 -0.78543266 -0.79554143 #> [51] -0.69053790 -0.55854199 -0.53666333 0.22712713 0.97845492 #> [56] -0.20888265 0.25853729 -0.44179945 0.56859986 0.42485844 #> [61] 0.24940178 0.44945378 0.42656655 0.10758399 0.02229473 #> [66] 0.60361101 -0.26265057 -0.52826408 0.19214942 # On a tibble using `filter()` filter(starwars, between(height, 100, 150)) #> # A tibble: 5 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Leia Orga… 150 49 brown light brown 19 fema… #> 2 Mon Mothma 150 NA auburn fair blue 48 fema… #> 3 Watto 137 NA black blue, grey yellow NA male #> 4 Sebulba 112 40 none grey, red orange NA male #> 5 Gasgano 122 NA none white, bl… black NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":null,"dir":"Reference","previous_headings":"","what":"Bind multiple data frames by column — bind_cols","title":"Bind multiple data frames by column — bind_cols","text":"Bind number data frames column, making wider result. similar .call(cbind, dfs). possible prefer using join combine multiple data frames. bind_cols() binds rows order appear easy create meaningless results without realising .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bind multiple data frames by column — bind_cols","text":"","code":"bind_cols( ..., .name_repair = c(\"unique\", \"universal\", \"check_unique\", \"minimal\") )"},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bind multiple data frames by column — bind_cols","text":"... Data frames combine. argument can either data frame, list data frame, list data frames. Inputs recycled length, matched position. .name_repair One \"unique\", \"universal\", \"check_unique\". See vctrs::vec_as_names() meaning options.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bind multiple data frames by column — bind_cols","text":"data frame type first element ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bind multiple data frames by column — bind_cols","text":"","code":"df1 <- tibble(x = 1:3) df2 <- tibble(y = 3:1) bind_cols(df1, df2) #> # A tibble: 3 × 2 #> x y #> #> 1 1 3 #> 2 2 2 #> 3 3 1 # Row sizes must be compatible when column-binding try(bind_cols(tibble(x = 1:3), tibble(y = 1:2))) #> Error in bind_cols(tibble(x = 1:3), tibble(y = 1:2)) : #> Can't recycle `..1` (size 3) to match `..2` (size 2)."},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":null,"dir":"Reference","previous_headings":"","what":"Bind multiple data frames by row — bind_rows","title":"Bind multiple data frames by row — bind_rows","text":"Bind number data frames row, making longer result. similar .call(rbind, dfs), output contain columns appear inputs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bind multiple data frames by row — bind_rows","text":"","code":"bind_rows(..., .id = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bind multiple data frames by row — bind_rows","text":"... Data frames combine. argument can either data frame, list data frame, list data frames. Columns matched name, missing columns filled NA. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bind multiple data frames by row — bind_rows","text":"data frame type first element ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bind multiple data frames by row — bind_rows","text":"","code":"df1 <- tibble(x = 1:2, y = letters[1:2]) df2 <- tibble(x = 4:5, z = 1:2) # You can supply individual data frames as arguments: bind_rows(df1, df2) #> # A tibble: 4 × 3 #> x y z #> #> 1 1 a NA #> 2 2 b NA #> 3 4 NA 1 #> 4 5 NA 2 # Or a list of data frames: bind_rows(list(df1, df2)) #> # A tibble: 4 × 3 #> x y z #> #> 1 1 a NA #> 2 2 b NA #> 3 4 NA 1 #> 4 5 NA 2 # When you supply a column name with the `.id` argument, a new # column is created to link each row to its original data frame bind_rows(list(df1, df2), .id = \"id\") #> # A tibble: 4 × 4 #> id x y z #> #> 1 1 1 a NA #> 2 1 2 b NA #> 3 2 4 NA 1 #> 4 2 5 NA 2 bind_rows(list(a = df1, b = df2), .id = \"id\") #> # A tibble: 4 × 4 #> id x y z #> #> 1 a 1 a NA #> 2 a 2 b NA #> 3 b 4 NA 1 #> 4 b 5 NA 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":null,"dir":"Reference","previous_headings":"","what":"Combine values from multiple columns — c_across","title":"Combine values from multiple columns — c_across","text":"c_across() designed work rowwise() make easy perform row-wise aggregations. two differences c(): uses tidy select semantics can easily select multiple variables. See vignette(\"rowwise\") details. uses vctrs::vec_c() order give safer outputs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Combine values from multiple columns — c_across","text":"","code":"c_across(cols)"},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Combine values from multiple columns — c_across","text":"cols Columns transform. select grouping columns already automatically handled verb (.e. summarise() mutate()).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Combine values from multiple columns — c_across","text":"","code":"df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4)) df %>% rowwise() %>% mutate( sum = sum(c_across(w:z)), sd = sd(c_across(w:z)) ) #> # A tibble: 4 × 7 #> # Rowwise: #> id w x y z sum sd #> #> 1 1 0.126 0.533 0.172 0.196 1.03 0.186 #> 2 2 0.938 0.547 0.691 0.969 3.14 0.202 #> 3 3 0.801 0.0959 0.675 0.387 1.96 0.315 #> 4 4 0.758 0.388 0.946 0.650 2.74 0.233"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":null,"dir":"Reference","previous_headings":"","what":"A general vectorised switch() — case_match","title":"A general vectorised switch() — case_match","text":"function allows vectorise multiple switch() statements. case evaluated sequentially first match element determines corresponding value output vector. cases match, .default used. case_match() R equivalent SQL \"simple\" CASE statement.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"connection-to-case-when-","dir":"Reference","previous_headings":"","what":"Connection to case_when()","title":"A general vectorised switch() — case_match","text":"case_when() uses logical expressions left-hand side formula, case_match() uses values match .x . following two statements roughly equivalent:","code":"case_when( x %in% c(\"a\", \"b\") ~ 1, x %in% \"c\" ~ 2, x %in% c(\"d\", \"e\") ~ 3 ) case_match( x, c(\"a\", \"b\") ~ 1, \"c\" ~ 2, c(\"d\", \"e\") ~ 3 )"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A general vectorised switch() — case_match","text":"","code":"case_match(.x, ..., .default = NULL, .ptype = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A general vectorised switch() — case_match","text":".x vector match . ... sequence two-sided formulas: old_values ~ new_value. right hand side (RHS) determines output value values .x match left hand side (LHS). LHS must evaluate type vector .x. can length, allowing map multiple .x values RHS value. value repeated LHS, .e. value .x matches multiple cases, first match used. RHS inputs coerced common type. RHS input recycled size .x. .default value used values .x matched LHS inputs. NULL, default, missing value used. .default recycled size .x. .ptype optional prototype declaring desired output type. supplied, output type taken common type RHS inputs .default.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"A general vectorised switch() — case_match","text":"vector size .x type common type RHS inputs .default (overridden .ptype).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A general vectorised switch() — case_match","text":"","code":"x <- c(\"a\", \"b\", \"a\", \"d\", \"b\", NA, \"c\", \"e\") # `case_match()` acts like a vectorized `switch()`. # Unmatched values \"fall through\" as a missing value. case_match( x, \"a\" ~ 1, \"b\" ~ 2, \"c\" ~ 3, \"d\" ~ 4 ) #> [1] 1 2 1 4 2 NA 3 NA # Missing values can be matched exactly, and `.default` can be used to # control the value used for unmatched values of `.x` case_match( x, \"a\" ~ 1, \"b\" ~ 2, \"c\" ~ 3, \"d\" ~ 4, NA ~ 0, .default = 100 ) #> [1] 1 2 1 4 2 0 3 100 # Input values can be grouped into the same expression to map them to the # same output value case_match( x, c(\"a\", \"b\") ~ \"low\", c(\"c\", \"d\", \"e\") ~ \"high\" ) #> [1] \"low\" \"low\" \"low\" \"high\" \"low\" NA \"high\" \"high\" # `case_match()` isn't limited to character input: y <- c(1, 2, 1, 3, 1, NA, 2, 4) case_match( y, c(1, 3) ~ \"odd\", c(2, 4) ~ \"even\", .default = \"missing\" ) #> [1] \"odd\" \"even\" \"odd\" \"odd\" \"odd\" \"missing\" \"even\" #> [8] \"even\" # Setting `.default` to the original vector is a useful way to replace # selected values, leaving everything else as is case_match(y, NA ~ 0, .default = y) #> [1] 1 2 1 3 1 0 2 4 starwars %>% mutate( # Replace missings, but leave everything else alone hair_color = case_match(hair_color, NA ~ \"unknown\", .default = hair_color), # Replace some, but not all, of the species species = case_match( species, \"Human\" ~ \"Humanoid\", \"Droid\" ~ \"Robot\", c(\"Wookiee\", \"Ewok\") ~ \"Hairy\", .default = species ), .keep = \"used\" ) #> # A tibble: 87 × 2 #> hair_color species #> #> 1 blond Humanoid #> 2 unknown Robot #> 3 unknown Robot #> 4 none Humanoid #> 5 brown Humanoid #> 6 brown, grey Humanoid #> 7 brown Humanoid #> 8 unknown Robot #> 9 black Humanoid #> 10 auburn, white Humanoid #> # ℹ 77 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":null,"dir":"Reference","previous_headings":"","what":"A general vectorised if-else — case_when","title":"A general vectorised if-else — case_when","text":"function allows vectorise multiple if_else() statements. case evaluated sequentially first match element determines corresponding value output vector. cases match, .default used final \"else\" statment. case_when() R equivalent SQL \"searched\" CASE statement.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A general vectorised if-else — case_when","text":"","code":"case_when(..., .default = NULL, .ptype = NULL, .size = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A general vectorised if-else — case_when","text":"... sequence two-sided formulas. left hand side (LHS) determines values match case. right hand side (RHS) provides replacement value. LHS inputs must evaluate logical vectors. RHS inputs coerced common type. inputs recycled common size. said, encourage LHS inputs size. Recycling mainly useful RHS inputs, might supply size 1 input recycled size LHS inputs. NULL inputs ignored. .default value used LHS inputs return either FALSE NA. .default must size 1 size common size computed .... .default participates computation common type RHS inputs. NA values LHS conditions treated like FALSE, meaning result locations assigned .default value. handle missing values conditions differently, must explicitly catch another condition fall .default. typically involves variation .na(x) ~ value tailored usage case_when(). NULL, default, missing value used. .ptype optional prototype declaring desired output type. supplied, overrides common type RHS inputs. .size optional size declaring desired output size. supplied, overrides common size computed ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"A general vectorised if-else — case_when","text":"vector size common size computed inputs ... type common type RHS inputs ....","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A general vectorised if-else — case_when","text":"","code":"x <- 1:70 case_when( x %% 35 == 0 ~ \"fizz buzz\", x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", .default = as.character(x) ) #> [1] \"1\" \"2\" \"3\" \"4\" \"fizz\" #> [6] \"6\" \"buzz\" \"8\" \"9\" \"fizz\" #> [11] \"11\" \"12\" \"13\" \"buzz\" \"fizz\" #> [16] \"16\" \"17\" \"18\" \"19\" \"fizz\" #> [21] \"buzz\" \"22\" \"23\" \"24\" \"fizz\" #> [26] \"26\" \"27\" \"buzz\" \"29\" \"fizz\" #> [31] \"31\" \"32\" \"33\" \"34\" \"fizz buzz\" #> [36] \"36\" \"37\" \"38\" \"39\" \"fizz\" #> [41] \"41\" \"buzz\" \"43\" \"44\" \"fizz\" #> [46] \"46\" \"47\" \"48\" \"buzz\" \"fizz\" #> [51] \"51\" \"52\" \"53\" \"54\" \"fizz\" #> [56] \"buzz\" \"57\" \"58\" \"59\" \"fizz\" #> [61] \"61\" \"62\" \"buzz\" \"64\" \"fizz\" #> [66] \"66\" \"67\" \"68\" \"69\" \"fizz buzz\" # Like an if statement, the arguments are evaluated in order, so you must # proceed from the most specific to the most general. This won't work: case_when( x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", x %% 35 == 0 ~ \"fizz buzz\", .default = as.character(x) ) #> [1] \"1\" \"2\" \"3\" \"4\" \"fizz\" \"6\" \"buzz\" \"8\" \"9\" \"fizz\" #> [11] \"11\" \"12\" \"13\" \"buzz\" \"fizz\" \"16\" \"17\" \"18\" \"19\" \"fizz\" #> [21] \"buzz\" \"22\" \"23\" \"24\" \"fizz\" \"26\" \"27\" \"buzz\" \"29\" \"fizz\" #> [31] \"31\" \"32\" \"33\" \"34\" \"fizz\" \"36\" \"37\" \"38\" \"39\" \"fizz\" #> [41] \"41\" \"buzz\" \"43\" \"44\" \"fizz\" \"46\" \"47\" \"48\" \"buzz\" \"fizz\" #> [51] \"51\" \"52\" \"53\" \"54\" \"fizz\" \"buzz\" \"57\" \"58\" \"59\" \"fizz\" #> [61] \"61\" \"62\" \"buzz\" \"64\" \"fizz\" \"66\" \"67\" \"68\" \"69\" \"fizz\" # If none of the cases match and no `.default` is supplied, NA is used: case_when( x %% 35 == 0 ~ \"fizz buzz\", x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", ) #> [1] NA NA NA NA \"fizz\" #> [6] NA \"buzz\" NA NA \"fizz\" #> [11] NA NA NA \"buzz\" \"fizz\" #> [16] NA NA NA NA \"fizz\" #> [21] \"buzz\" NA NA NA \"fizz\" #> [26] NA NA \"buzz\" NA \"fizz\" #> [31] NA NA NA NA \"fizz buzz\" #> [36] NA NA NA NA \"fizz\" #> [41] NA \"buzz\" NA NA \"fizz\" #> [46] NA NA NA \"buzz\" \"fizz\" #> [51] NA NA NA NA \"fizz\" #> [56] \"buzz\" NA NA NA \"fizz\" #> [61] NA NA \"buzz\" NA \"fizz\" #> [66] NA NA NA NA \"fizz buzz\" # Note that `NA` values on the LHS are treated like `FALSE` and will be # assigned the `.default` value. You must handle them explicitly if you # want to use a different value. The exact way to handle missing values is # dependent on the set of LHS conditions you use. x[2:4] <- NA_real_ case_when( x %% 35 == 0 ~ \"fizz buzz\", x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", is.na(x) ~ \"nope\", .default = as.character(x) ) #> [1] \"1\" \"nope\" \"nope\" \"nope\" \"fizz\" #> [6] \"6\" \"buzz\" \"8\" \"9\" \"fizz\" #> [11] \"11\" \"12\" \"13\" \"buzz\" \"fizz\" #> [16] \"16\" \"17\" \"18\" \"19\" \"fizz\" #> [21] \"buzz\" \"22\" \"23\" \"24\" \"fizz\" #> [26] \"26\" \"27\" \"buzz\" \"29\" \"fizz\" #> [31] \"31\" \"32\" \"33\" \"34\" \"fizz buzz\" #> [36] \"36\" \"37\" \"38\" \"39\" \"fizz\" #> [41] \"41\" \"buzz\" \"43\" \"44\" \"fizz\" #> [46] \"46\" \"47\" \"48\" \"buzz\" \"fizz\" #> [51] \"51\" \"52\" \"53\" \"54\" \"fizz\" #> [56] \"buzz\" \"57\" \"58\" \"59\" \"fizz\" #> [61] \"61\" \"62\" \"buzz\" \"64\" \"fizz\" #> [66] \"66\" \"67\" \"68\" \"69\" \"fizz buzz\" # `case_when()` evaluates all RHS expressions, and then constructs its # result by extracting the selected (via the LHS expressions) parts. # In particular `NaN`s are produced in this case: y <- seq(-2, 2, by = .5) case_when( y >= 0 ~ sqrt(y), .default = y ) #> Warning: NaNs produced #> [1] -2.0000000 -1.5000000 -1.0000000 -0.5000000 0.0000000 0.7071068 #> [7] 1.0000000 1.2247449 1.4142136 # `case_when()` is particularly useful inside `mutate()` when you want to # create a new variable that relies on a complex combination of existing # variables starwars %>% select(name:mass, gender, species) %>% mutate( type = case_when( height > 200 | mass > 200 ~ \"large\", species == \"Droid\" ~ \"robot\", .default = \"other\" ) ) #> # A tibble: 87 × 6 #> name height mass gender species type #> #> 1 Luke Skywalker 172 77 masculine Human other #> 2 C-3PO 167 75 masculine Droid robot #> 3 R2-D2 96 32 masculine Droid robot #> 4 Darth Vader 202 136 masculine Human large #> 5 Leia Organa 150 49 feminine Human other #> 6 Owen Lars 178 120 masculine Human other #> 7 Beru Whitesun Lars 165 75 feminine Human other #> 8 R5-D4 97 32 masculine Droid robot #> 9 Biggs Darklighter 183 84 masculine Human other #> 10 Obi-Wan Kenobi 182 77 masculine Human other #> # ℹ 77 more rows # `case_when()` is not a tidy eval function. If you'd like to reuse # the same patterns, extract the `case_when()` call in a normal # function: case_character_type <- function(height, mass, species) { case_when( height > 200 | mass > 200 ~ \"large\", species == \"Droid\" ~ \"robot\", .default = \"other\" ) } case_character_type(150, 250, \"Droid\") #> [1] \"large\" case_character_type(150, 150, \"Droid\") #> [1] \"robot\" # Such functions can be used inside `mutate()` as well: starwars %>% mutate(type = case_character_type(height, mass, species)) %>% pull(type) #> [1] \"other\" \"robot\" \"robot\" \"large\" \"other\" \"other\" \"other\" \"robot\" #> [9] \"other\" \"other\" \"other\" \"other\" \"large\" \"other\" \"other\" \"large\" #> [17] \"other\" \"other\" \"other\" \"other\" \"other\" \"robot\" \"other\" \"other\" #> [25] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [33] \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" \"other\" \"other\" #> [41] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [49] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" #> [57] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [65] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" \"large\" #> [73] \"other\" \"robot\" \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" #> [81] \"other\" \"large\" \"other\" \"other\" \"other\" \"robot\" \"other\" # `case_when()` ignores `NULL` inputs. This is useful when you'd # like to use a pattern only under certain conditions. Here we'll # take advantage of the fact that `if` returns `NULL` when there is # no `else` clause: case_character_type <- function(height, mass, species, robots = TRUE) { case_when( height > 200 | mass > 200 ~ \"large\", if (robots) species == \"Droid\" ~ \"robot\", .default = \"other\" ) } starwars %>% mutate(type = case_character_type(height, mass, species, robots = FALSE)) %>% pull(type) #> [1] \"other\" \"other\" \"other\" \"large\" \"other\" \"other\" \"other\" \"other\" #> [9] \"other\" \"other\" \"other\" \"other\" \"large\" \"other\" \"other\" \"large\" #> [17] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [25] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [33] \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" \"other\" \"other\" #> [41] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [49] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" #> [57] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [65] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" \"large\" #> [73] \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" #> [81] \"other\" \"large\" \"other\" \"other\" \"other\" \"other\" \"other\""},{"path":"https://dplyr.tidyverse.org/dev/reference/check_dbplyr.html","id":null,"dir":"Reference","previous_headings":"","what":"dbplyr compatibility functions — check_dbplyr","title":"dbplyr compatibility functions — check_dbplyr","text":"dplyr 0.7.0, number database SQL functions moved dplyr dbplyr. generic functions stayed dplyr (since easy way conditionally import generic different packages), many SQL database helper functions moved. written backend, functions generate code need work dplyr 0.5.0 dplyr 0.7.0.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/check_dbplyr.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"dbplyr compatibility functions — check_dbplyr","text":"","code":"check_dbplyr() wrap_dbplyr_obj(obj_name)"},{"path":"https://dplyr.tidyverse.org/dev/reference/check_dbplyr.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"dbplyr compatibility functions — check_dbplyr","text":"","code":"wrap_dbplyr_obj(\"build_sql\") #> build_sql <- function (obj_name) #> { #> if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { #> dplyr::check_dbplyr() #> dbplyr::build_sql(obj_name = obj_name) #> } #> else { #> dplyr::build_sql(obj_name = obj_name) #> } #> } wrap_dbplyr_obj(\"base_agg\") #> base_agg <- function () #> { #> if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { #> dplyr::check_dbplyr() #> dbplyr::base_agg #> } #> else { #> dplyr::base_agg #> } #> }"},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":null,"dir":"Reference","previous_headings":"","what":"Find the first non-missing element — coalesce","title":"Find the first non-missing element — coalesce","text":"Given set vectors, coalesce() finds first non-missing value position. inspired SQL COALESCE function thing SQL NULLs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find the first non-missing element — coalesce","text":"","code":"coalesce(..., .ptype = NULL, .size = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find the first non-missing element — coalesce","text":"... One vectors. recycled , cast common type. .ptype optional prototype declaring desired output type. supplied, overrides common type vectors .... .size optional size declaring desired output size. supplied, overrides common size vectors ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find the first non-missing element — coalesce","text":"vector type size common type common size vectors ....","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Find the first non-missing element — coalesce","text":"","code":"# Use a single value to replace all missing values x <- sample(c(1:5, NA, NA, NA)) coalesce(x, 0L) #> [1] 2 3 4 0 0 0 5 1 # The equivalent to a missing value in a list is `NULL` coalesce(list(1, 2, NULL), list(NA)) #> [[1]] #> [1] 1 #> #> [[2]] #> [1] 2 #> #> [[3]] #> [1] NA #> # Or generate a complete vector from partially missing pieces y <- c(1, 2, NA, NA, 5) z <- c(NA, NA, 3, 4, 5) coalesce(y, z) #> [1] 1 2 3 4 5 # Supply lists by splicing them into dots: vecs <- list( c(1, 2, NA, NA, 5), c(NA, NA, 3, 4, 5) ) coalesce(!!!vecs) #> [1] 1 2 3 4 5"},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":null,"dir":"Reference","previous_headings":"","what":"Combine vectors — combine","title":"Combine vectors — combine","text":"combine() deprecated favour vctrs::vec_c(). combine() attempted automatically guess whether wanted c() unlist(), fail surprising ways. now believe better explicit.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Combine vectors — combine","text":"","code":"combine(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Combine vectors — combine","text":"... Vectors combine.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Combine vectors — combine","text":"","code":"f1 <- factor(\"a\") f2 <- factor(\"b\") combine(f1, f2) #> Warning: `combine()` was deprecated in dplyr 1.0.0. #> ℹ Please use `vctrs::vec_c()` instead. #> [1] a b #> Levels: a b # -> vctrs::vec_c(f1, f1) #> [1] a a #> Levels: a combine(list(f1, f2)) #> Warning: `combine()` was deprecated in dplyr 1.0.0. #> ℹ Please use `vctrs::vec_c()` instead. #> [1] a b #> Levels: a b # -> vctrs::vec_c(!!!list(f1, f2)) #> [1] a b #> Levels: a b"},{"path":"https://dplyr.tidyverse.org/dev/reference/common_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract out common by variables — common_by","title":"Extract out common by variables — common_by","text":"Extract common variables","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/common_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract out common by variables — common_by","text":"","code":"common_by(by = NULL, x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":null,"dir":"Reference","previous_headings":"","what":"Force computation of a database query — compute","title":"Force computation of a database query — compute","text":"compute() stores results remote temporary table. collect() retrieves data local tibble. collapse() slightly different: force computation, instead forces generation SQL query. sometimes needed work around bugs dplyr's SQL generation. functions preserve grouping ordering.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Force computation of a database query — compute","text":"","code":"compute(x, ...) collect(x, ...) collapse(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Force computation of a database query — compute","text":"x data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Arguments passed methods","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Force computation of a database query — compute","text":"functions generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: compute(): dbplyr (tbl_sql), dplyr (data.frame) collect(): dbplyr (tbl_sql), dplyr (data.frame) collapse(): dbplyr (tbl_sql), dplyr (data.frame)","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Force computation of a database query — compute","text":"","code":"mtcars2 <- dbplyr::src_memdb() %>% copy_to(mtcars, name = \"mtcars2-cc\", overwrite = TRUE) remote <- mtcars2 %>% filter(cyl == 8) %>% select(mpg:drat) # Compute query and save in remote table compute(remote) #> # Source: table<`dbplyr_rUoBF7QxMz`> [?? x 5] #> # Database: sqlite 3.45.2 [:memory:] #> mpg cyl disp hp drat #> #> 1 18.7 8 360 175 3.15 #> 2 14.3 8 360 245 3.21 #> 3 16.4 8 276. 180 3.07 #> 4 17.3 8 276. 180 3.07 #> 5 15.2 8 276. 180 3.07 #> 6 10.4 8 472 205 2.93 #> 7 10.4 8 460 215 3 #> 8 14.7 8 440 230 3.23 #> 9 15.5 8 318 150 2.76 #> 10 15.2 8 304 150 3.15 #> # ℹ more rows # Compute query bring back to this session collect(remote) #> # A tibble: 14 × 5 #> mpg cyl disp hp drat #> #> 1 18.7 8 360 175 3.15 #> 2 14.3 8 360 245 3.21 #> 3 16.4 8 276. 180 3.07 #> 4 17.3 8 276. 180 3.07 #> 5 15.2 8 276. 180 3.07 #> 6 10.4 8 472 205 2.93 #> 7 10.4 8 460 215 3 #> 8 14.7 8 440 230 3.23 #> 9 15.5 8 318 150 2.76 #> 10 15.2 8 304 150 3.15 #> 11 13.3 8 350 245 3.73 #> 12 19.2 8 400 175 3.08 #> 13 15.8 8 351 264 4.22 #> 14 15 8 301 335 3.54 # Creates a fresh query based on the generated SQL collapse(remote) #> # Source: SQL [?? x 5] #> # Database: sqlite 3.45.2 [:memory:] #> mpg cyl disp hp drat #> #> 1 18.7 8 360 175 3.15 #> 2 14.3 8 360 245 3.21 #> 3 16.4 8 276. 180 3.07 #> 4 17.3 8 276. 180 3.07 #> 5 15.2 8 276. 180 3.07 #> 6 10.4 8 472 205 2.93 #> 7 10.4 8 460 215 3 #> 8 14.7 8 440 230 3.23 #> 9 15.5 8 318 150 2.76 #> 10 15.2 8 304 150 3.15 #> # ℹ more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":null,"dir":"Reference","previous_headings":"","what":"Generate a unique identifier for consecutive combinations — consecutive_id","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"consecutive_id() generates unique identifier increments every time variable (combination variables) changes. Inspired data.table::rleid().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"","code":"consecutive_id(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"... Unnamed vectors. multiple vectors supplied, length.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"numeric vector length longest element ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"","code":"consecutive_id(c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, NA, NA)) #> [1] 1 1 2 2 3 4 5 5 consecutive_id(c(1, 1, 1, 2, 1, 1, 2, 2)) #> [1] 1 1 1 2 3 3 4 4 df <- data.frame(x = c(0, 0, 1, 0), y = c(2, 2, 2, 2)) df %>% group_by(x, y) %>% summarise(n = n()) #> `summarise()` has grouped output by 'x'. You can override using the #> `.groups` argument. #> # A tibble: 2 × 3 #> # Groups: x [2] #> x y n #> #> 1 0 2 3 #> 2 1 2 1 df %>% group_by(id = consecutive_id(x, y), x, y) %>% summarise(n = n()) #> `summarise()` has grouped output by 'id', 'x'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 4 #> # Groups: id, x [3] #> id x y n #> #> 1 1 0 2 2 #> 2 2 1 2 1 #> 3 3 0 2 1"},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":null,"dir":"Reference","previous_headings":"","what":"Information about the ","title":"Information about the ","text":"functions return information \"current\" group \"current\" variable, work inside specific contexts like summarise() mutate(). n() gives current group size. cur_group() gives group keys, tibble one row one column grouping variable. cur_group_id() gives unique numeric identifier current group. cur_group_rows() gives row indices current group. cur_column() gives name current column (across() ). See group_data() equivalent functions return values groups. See pick() way select subset columns using tidyselect syntax inside summarise() mutate().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Information about the ","text":"","code":"n() cur_group() cur_group_id() cur_group_rows() cur_column()"},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":"data-table","dir":"Reference","previous_headings":"","what":"data.table","title":"Information about the ","text":"familiar data.table: cur_group_id() <-> .GRP cur_group() <-> .cur_group_rows() <-> .See pick() equivalent .SD.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Information about the ","text":"","code":"df <- tibble( g = sample(rep(letters[1:3], 1:3)), x = runif(6), y = runif(6) ) gf <- df %>% group_by(g) gf %>% summarise(n = n()) #> # A tibble: 3 × 2 #> g n #> #> 1 a 1 #> 2 b 2 #> 3 c 3 gf %>% mutate(id = cur_group_id()) #> # A tibble: 6 × 4 #> # Groups: g [3] #> g x y id #> #> 1 c 0.0367 0.642 3 #> 2 b 0.733 0.327 2 #> 3 b 0.215 0.387 2 #> 4 a 0.0160 0.708 1 #> 5 c 0.129 0.698 3 #> 6 c 0.686 0.926 3 gf %>% reframe(row = cur_group_rows()) #> # A tibble: 6 × 2 #> g row #> #> 1 a 4 #> 2 b 2 #> 3 b 3 #> 4 c 1 #> 5 c 5 #> 6 c 6 gf %>% summarise(data = list(cur_group())) #> # A tibble: 3 × 2 #> g data #> #> 1 a #> 2 b #> 3 c gf %>% mutate(across(everything(), ~ paste(cur_column(), round(.x, 2)))) #> # A tibble: 6 × 3 #> # Groups: g [3] #> g x y #> #> 1 c x 0.04 y 0.64 #> 2 b x 0.73 y 0.33 #> 3 b x 0.22 y 0.39 #> 4 a x 0.02 y 0.71 #> 5 c x 0.13 y 0.7 #> 6 c x 0.69 y 0.93"},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":null,"dir":"Reference","previous_headings":"","what":"Copy a local data frame to a remote src — copy_to","title":"Copy a local data frame to a remote src — copy_to","text":"function uploads local data frame remote data source, creating table definition needed. Wherever possible, new object temporary, limited current connection source.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Copy a local data frame to a remote src — copy_to","text":"","code":"copy_to(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Copy a local data frame to a remote src — copy_to","text":"dest remote data source df local data frame name name new remote table. overwrite TRUE, overwrite existing table name name. FALSE, throw error name already exists. ... parameters passed methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Copy a local data frame to a remote src — copy_to","text":"tbl object remote source","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Copy a local data frame to a remote src — copy_to","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (src_sql), dplyr (DBIConnection, src_local) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Copy a local data frame to a remote src — copy_to","text":"","code":"if (FALSE) { iris2 <- dbplyr::src_memdb() %>% copy_to(iris, overwrite = TRUE) iris2 }"},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":null,"dir":"Reference","previous_headings":"","what":"Count the observations in each group — count","title":"Count the observations in each group — count","text":"count() lets quickly count unique values one variables: df %>% count(, b) roughly equivalent df %>% group_by(, b) %>% summarise(n = n()). count() paired tally(), lower-level helper equivalent df %>% summarise(n = n()). Supply wt perform weighted counts, switching summary n = n() n = sum(wt). add_count() add_tally() equivalents count() tally() use mutate() instead summarise() add new column group-wise counts.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Count the observations in each group — count","text":"","code":"count(x, ..., wt = NULL, sort = FALSE, name = NULL) # S3 method for data.frame count( x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x) ) tally(x, wt = NULL, sort = FALSE, name = NULL) add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) add_tally(x, wt = NULL, sort = FALSE, name = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Count the observations in each group — count","text":"x data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). ... Variables group . wt Frequency weights. Can NULL variable: NULL (default), counts number rows group. variable, computes sum(wt) group. sort TRUE, show largest groups top. name name new column output. omitted, default n. already column called n, use nn. column called n nn, 'll use nnn, , adding ns gets new name. .drop Handling factor levels appear data, passed group_by(). count(): FALSE include counts empty groups (.e. levels factors exist data). add_count(): deprecated since actually affect output.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Count the observations in each group — count","text":"object type .data. count() add_count() group transiently, output groups input.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Count the observations in each group — count","text":"","code":"# count() is a convenient way to get a sense of the distribution of # values in a dataset starwars %>% count(species) #> # A tibble: 38 × 2 #> species n #> #> 1 Aleena 1 #> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 #> 5 Clawdite 1 #> 6 Droid 6 #> 7 Dug 1 #> 8 Ewok 1 #> 9 Geonosian 1 #> 10 Gungan 3 #> # ℹ 28 more rows starwars %>% count(species, sort = TRUE) #> # A tibble: 38 × 2 #> species n #> #> 1 Human 35 #> 2 Droid 6 #> 3 NA 4 #> 4 Gungan 3 #> 5 Kaminoan 2 #> 6 Mirialan 2 #> 7 Twi'lek 2 #> 8 Wookiee 2 #> 9 Zabrak 2 #> 10 Aleena 1 #> # ℹ 28 more rows starwars %>% count(sex, gender, sort = TRUE) #> # A tibble: 6 × 3 #> sex gender n #> #> 1 male masculine 60 #> 2 female feminine 16 #> 3 none masculine 5 #> 4 NA NA 4 #> 5 hermaphroditic masculine 1 #> 6 none feminine 1 starwars %>% count(birth_decade = round(birth_year, -1)) #> # A tibble: 15 × 2 #> birth_decade n #> #> 1 10 1 #> 2 20 6 #> 3 30 4 #> 4 40 6 #> 5 50 8 #> 6 60 4 #> 7 70 4 #> 8 80 2 #> 9 90 3 #> 10 100 1 #> 11 110 1 #> 12 200 1 #> 13 600 1 #> 14 900 1 #> 15 NA 44 # use the `wt` argument to perform a weighted count. This is useful # when the data has already been aggregated once df <- tribble( ~name, ~gender, ~runs, \"Max\", \"male\", 10, \"Sandra\", \"female\", 1, \"Susan\", \"female\", 4 ) # counts rows: df %>% count(gender) #> # A tibble: 2 × 2 #> gender n #> #> 1 female 2 #> 2 male 1 # counts runs: df %>% count(gender, wt = runs) #> # A tibble: 2 × 2 #> gender n #> #> 1 female 5 #> 2 male 10 # When factors are involved, `.drop = FALSE` can be used to retain factor # levels that don't appear in the data df2 <- tibble( id = 1:5, type = factor(c(\"a\", \"c\", \"a\", NA, \"a\"), levels = c(\"a\", \"b\", \"c\")) ) df2 %>% count(type) #> # A tibble: 3 × 2 #> type n #> #> 1 a 3 #> 2 c 1 #> 3 NA 1 df2 %>% count(type, .drop = FALSE) #> # A tibble: 4 × 2 #> type n #> #> 1 a 3 #> 2 b 0 #> 3 c 1 #> 4 NA 1 # Or, using `group_by()`: df2 %>% group_by(type, .drop = FALSE) %>% count() #> # A tibble: 4 × 2 #> # Groups: type [4] #> type n #> #> 1 a 3 #> 2 b 0 #> 3 c 1 #> 4 NA 1 # tally() is a lower-level function that assumes you've done the grouping starwars %>% tally() #> # A tibble: 1 × 1 #> n #> #> 1 87 starwars %>% group_by(species) %>% tally() #> # A tibble: 38 × 2 #> species n #> #> 1 Aleena 1 #> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 #> 5 Clawdite 1 #> 6 Droid 6 #> 7 Dug 1 #> 8 Ewok 1 #> 9 Geonosian 1 #> 10 Gungan 3 #> # ℹ 28 more rows # both count() and tally() have add_ variants that work like # mutate() instead of summarise df %>% add_count(gender, wt = runs) #> # A tibble: 3 × 4 #> name gender runs n #> #> 1 Max male 10 10 #> 2 Sandra female 1 5 #> 3 Susan female 4 5 df %>% add_tally(wt = runs) #> # A tibble: 3 × 4 #> name gender runs n #> #> 1 Max male 10 15 #> 2 Sandra female 1 15 #> 3 Susan female 4 15"},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":null,"dir":"Reference","previous_headings":"","what":"Cross join — cross_join","title":"Cross join — cross_join","text":"Cross joins match row x every row y, resulting data frame nrow(x) * nrow(y) rows. Since cross joins result possible matches x y, technically serve basis mutating joins, can generally thought cross joins followed filter. practice, specialized procedure used better performance.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cross join — cross_join","text":"","code":"cross_join(x, y, ..., copy = FALSE, suffix = c(\".x\", \".y\"))"},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cross join — cross_join","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. ... parameters passed onto methods. copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . suffix non-joined duplicate variables x y, suffixes added output disambiguate . character vector length 2.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Cross join — cross_join","text":"object type x (including groups). output following properties: nrow(x) * nrow(y) rows returned. Output columns include columns x y. Column name collisions resolved using suffix. order rows columns x preserved much possible.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Cross join — cross_join","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Cross join — cross_join","text":"","code":"# Cross joins match each row in `x` to every row in `y`. # Data within the columns is not used in the matching process. cross_join(band_instruments, band_members) #> # A tibble: 9 × 4 #> name.x plays name.y band #> #> 1 John guitar Mick Stones #> 2 John guitar John Beatles #> 3 John guitar Paul Beatles #> 4 Paul bass Mick Stones #> 5 Paul bass John Beatles #> 6 Paul bass Paul Beatles #> 7 Keith guitar Mick Stones #> 8 Keith guitar John Beatles #> 9 Keith guitar Paul Beatles # Control the suffix added to variables duplicated in # `x` and `y` with `suffix`. cross_join(band_instruments, band_members, suffix = c(\"\", \"_y\")) #> # A tibble: 9 × 4 #> name plays name_y band #> #> 1 John guitar Mick Stones #> 2 John guitar John Beatles #> 3 John guitar Paul Beatles #> 4 Paul bass Mick Stones #> 5 Paul bass John Beatles #> 6 Paul bass Paul Beatles #> 7 Keith guitar Mick Stones #> 8 Keith guitar John Beatles #> 9 Keith guitar Paul Beatles"},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":null,"dir":"Reference","previous_headings":"","what":"Cumulativate versions of any, all, and mean — cumall","title":"Cumulativate versions of any, all, and mean — cumall","text":"dplyr provides cumall(), cumany(), cummean() complete R's set cumulative functions.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cumulativate versions of any, all, and mean — cumall","text":"","code":"cumall(x) cumany(x) cummean(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cumulativate versions of any, all, and mean — cumall","text":"x cumall() cumany(), logical vector; cummean() integer numeric vector.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Cumulativate versions of any, all, and mean — cumall","text":"vector length x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"cumulative-logical-functions","dir":"Reference","previous_headings":"","what":"Cumulative logical functions","title":"Cumulativate versions of any, all, and mean — cumall","text":"particularly useful conjunction filter(): cumall(x): cases first FALSE. cumall(!x): cases first TRUE. cumany(x): cases first TRUE. cumany(!x): cases first FALSE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Cumulativate versions of any, all, and mean — cumall","text":"","code":"# `cummean()` returns a numeric/integer vector of the same length # as the input vector. x <- c(1, 3, 5, 2, 2) cummean(x) #> [1] 1.00 2.00 3.00 2.75 2.60 cumsum(x) / seq_along(x) #> [1] 1.00 2.00 3.00 2.75 2.60 # `cumall()` and `cumany()` return logicals cumall(x < 5) #> [1] TRUE TRUE FALSE FALSE FALSE cumany(x == 3) #> [1] FALSE TRUE TRUE TRUE TRUE # `cumall()` vs. `cumany()` df <- data.frame( date = as.Date(\"2020-01-01\") + 0:6, balance = c(100, 50, 25, -25, -50, 30, 120) ) # all rows after first overdraft df %>% filter(cumany(balance < 0)) #> date balance #> 1 2020-01-04 -25 #> 2 2020-01-05 -50 #> 3 2020-01-06 30 #> 4 2020-01-07 120 # all rows until first overdraft df %>% filter(cumall(!(balance < 0))) #> date balance #> 1 2020-01-01 100 #> 2 2020-01-02 50 #> 3 2020-01-03 25"},{"path":"https://dplyr.tidyverse.org/dev/reference/defunct.html","id":null,"dir":"Reference","previous_headings":"","what":"Defunct functions — defunct","title":"Defunct functions — defunct","text":"functions deprecated least two years made defunct. known replacement, calling function tell .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/defunct.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Defunct functions — defunct","text":"","code":"# Deprecated in 0.5.0 ------------------------------------- id(.variables, drop = FALSE) # Deprecated in 0.7.0 ------------------------------------- failwith(default = NULL, f, quiet = FALSE) # Deprecated in 0.8.* ------------------------------------- select_vars(vars = chr(), ..., include = chr(), exclude = chr()) rename_vars(vars = chr(), ..., strict = TRUE) select_var(vars, var = -1) current_vars(...) # Deprecated in 1.0.0 ------------------------------------- bench_tbls(tbls, op, ..., times = 10) compare_tbls(tbls, op, ref = NULL, compare = equal_data_frame, ...) compare_tbls2(tbls_x, tbls_y, op, ref = NULL, compare = equal_data_frame, ...) eval_tbls(tbls, op) eval_tbls2(tbls_x, tbls_y, op) location(df) changes(x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/deprec-context.html","id":null,"dir":"Reference","previous_headings":"","what":"Information about the ","title":"Information about the ","text":"functions deprecated dplyr 1.1.0. cur_data() deprecated favor pick(). cur_data_all() deprecated direct replacement selecting grouping variables well-defined unlikely ever useful.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/deprec-context.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Information about the ","text":"","code":"cur_data() cur_data_all()"},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":null,"dir":"Reference","previous_headings":"","what":"Descending order — desc","title":"Descending order — desc","text":"Transform vector format sorted descending order. useful within arrange().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Descending order — desc","text":"","code":"desc(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Descending order — desc","text":"x vector transform","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Descending order — desc","text":"","code":"desc(1:10) #> [1] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 desc(factor(letters)) #> [1] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 #> [18] -18 -19 -20 -21 -22 -23 -24 -25 -26 first_day <- seq(as.Date(\"1910/1/1\"), as.Date(\"1920/1/1\"), \"years\") desc(first_day) #> [1] 21915 21550 21185 20819 20454 20089 19724 19358 18993 18628 18263 starwars %>% arrange(desc(mass)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba De… 175 1358 NA green-tan… orange 600 herm… #> 2 Grievous 216 159 none brown, wh… green, y… NA male #> 3 IG-88 200 140 none metal red 15 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Tarfful 234 136 brown brown blue NA male #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Bossk 190 113 none green red 53 male #> 8 Chewbacca 228 112 brown unknown blue 200 male #> 9 Jek Tono… 180 110 brown fair blue NA NA #> 10 Dexter J… 198 102 none brown yellow NA male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":null,"dir":"Reference","previous_headings":"","what":"Describing dimensions — dim_desc","title":"Describing dimensions — dim_desc","text":"Prints dimensions array-like object user-friendly manner, substituting NA ?? (SQL queries).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Describing dimensions — dim_desc","text":"","code":"dim_desc(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Describing dimensions — dim_desc","text":"x Object show dimensions .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Describing dimensions — dim_desc","text":"","code":"dim_desc(mtcars) #> [1] \"[32 x 11]\""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":null,"dir":"Reference","previous_headings":"","what":"Keep distinct/unique rows — distinct","title":"Keep distinct/unique rows — distinct","text":"Keep unique/distinct rows data frame. similar unique.data.frame() considerably faster.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Keep distinct/unique rows — distinct","text":"","code":"distinct(.data, ..., .keep_all = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Keep distinct/unique rows — distinct","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Optional variables use determining uniqueness. multiple rows given combination inputs, first row preserved. omitted, use variables data frame. .keep_all TRUE, keep variables .data. combination ... distinct, keeps first row values.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Keep distinct/unique rows — distinct","text":"object type .data. output following properties: Rows subset input appear order. Columns modified ... empty .keep_all TRUE. Otherwise, distinct() first calls mutate() create new columns. Groups modified. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Keep distinct/unique rows — distinct","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Keep distinct/unique rows — distinct","text":"","code":"df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) nrow(df) #> [1] 100 nrow(distinct(df)) #> [1] 71 nrow(distinct(df, x, y)) #> [1] 71 distinct(df, x) #> # A tibble: 10 × 1 #> x #> #> 1 3 #> 2 1 #> 3 9 #> 4 10 #> 5 5 #> 6 2 #> 7 8 #> 8 4 #> 9 7 #> 10 6 distinct(df, y) #> # A tibble: 10 × 1 #> y #> #> 1 7 #> 2 6 #> 3 3 #> 4 4 #> 5 10 #> 6 9 #> 7 1 #> 8 2 #> 9 5 #> 10 8 # You can choose to keep all other variables as well distinct(df, x, .keep_all = TRUE) #> # A tibble: 10 × 2 #> x y #> #> 1 3 7 #> 2 1 6 #> 3 9 4 #> 4 10 3 #> 5 5 4 #> 6 2 10 #> 7 8 9 #> 8 4 1 #> 9 7 3 #> 10 6 7 distinct(df, y, .keep_all = TRUE) #> # A tibble: 10 × 2 #> x y #> #> 1 3 7 #> 2 1 6 #> 3 1 3 #> 4 9 4 #> 5 2 10 #> 6 8 9 #> 7 4 1 #> 8 5 2 #> 9 3 5 #> 10 3 8 # You can also use distinct on computed variables distinct(df, diff = abs(x - y)) #> # A tibble: 10 × 1 #> diff #> #> 1 4 #> 2 5 #> 3 2 #> 4 7 #> 5 1 #> 6 8 #> 7 3 #> 8 6 #> 9 0 #> 10 9 # Use `pick()` to select columns with tidy-select distinct(starwars, pick(contains(\"color\"))) #> # A tibble: 67 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> 5 brown light brown #> 6 brown, grey light blue #> 7 brown light blue #> 8 NA white, red red #> 9 black light brown #> 10 auburn, white fair blue-gray #> # ℹ 57 more rows # Grouping ------------------------------------------------- df <- tibble( g = c(1, 1, 2, 2, 2), x = c(1, 1, 2, 1, 2), y = c(3, 2, 1, 3, 1) ) df <- df %>% group_by(g) # With grouped data frames, distinctness is computed within each group df %>% distinct(x) #> # A tibble: 3 × 2 #> # Groups: g [2] #> g x #> #> 1 1 1 #> 2 2 2 #> 3 2 1 # When `...` are omitted, `distinct()` still computes distinctness using # all variables in the data frame df %>% distinct() #> # A tibble: 4 × 3 #> # Groups: g [2] #> g x y #> #> 1 1 1 3 #> 2 1 1 2 #> 3 2 2 1 #> 4 2 1 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Select distinct rows by a selection of variables — distinct_all","title":"Select distinct rows by a selection of variables — distinct_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants distinct() extract distinct rows selection variables. Like distinct(), can modify variables ordering .funs argument.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select distinct rows by a selection of variables — distinct_all","text":"","code":"distinct_all(.tbl, .funs = list(), ..., .keep_all = FALSE) distinct_at(.tbl, .vars, .funs = list(), ..., .keep_all = FALSE) distinct_if(.tbl, .predicate, .funs = list(), ..., .keep_all = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select distinct rows by a selection of variables — distinct_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .keep_all TRUE, keep variables .data. combination ... distinct, keeps first row values. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Select distinct rows by a selection of variables — distinct_all","text":"grouping variables part selection taken account determine distinct rows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select distinct rows by a selection of variables — distinct_all","text":"","code":"df <- tibble(x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2) distinct_all(df) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # -> distinct(df, pick(everything())) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 distinct_at(df, vars(x,y)) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # -> distinct(df, pick(x, y)) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 distinct_if(df, is.numeric) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # -> distinct(df, pick(where(is.numeric))) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # You can supply a function that will be applied before extracting the distinct values # The variables of the sorted tibble keep their original values. distinct_all(df, round) #> # A tibble: 3 × 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 2 2 # -> distinct(df, across(everything(), round)) #> # A tibble: 3 × 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 2 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":null,"dir":"Reference","previous_headings":"","what":"Do anything — do","title":"Do anything — do","text":"() superseded dplyr 1.0.0, syntax never really felt like belonged rest dplyr. replaced combination reframe() (can produce multiple rows multiple columns), nest_by() (creates rowwise tibble nested data), pick() (allows access data \"current\" group).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Do anything — do","text":"","code":"do(.data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Do anything — do","text":".data tbl ... Expressions apply group. named, results stored new column. unnamed, must return data frame. can use . refer current group. can mix named unnamed arguments.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Do anything — do","text":"","code":"# do() with unnamed arguments becomes reframe() or summarise() # . becomes pick() by_cyl <- mtcars %>% group_by(cyl) by_cyl %>% do(head(., 2)) #> # A tibble: 6 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 # -> by_cyl %>% reframe(head(pick(everything()), 2)) #> # A tibble: 6 × 11 #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 6 21 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 8 14.3 360 245 3.21 3.57 15.8 0 0 3 4 by_cyl %>% slice_head(n = 2) #> # A tibble: 6 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 # Can refer to variables directly by_cyl %>% do(mean = mean(.$vs)) #> # A tibble: 3 × 2 #> # Rowwise: #> cyl mean #> #> 1 4 #> 2 6 #> 3 8 # -> by_cyl %>% summarise(mean = mean(vs)) #> # A tibble: 3 × 2 #> cyl mean #> #> 1 4 0.909 #> 2 6 0.571 #> 3 8 0 # do() with named arguments becomes nest_by() + mutate() & list() models <- by_cyl %>% do(mod = lm(mpg ~ disp, data = .)) # -> models <- mtcars %>% nest_by(cyl) %>% mutate(mod = list(lm(mpg ~ disp, data = data))) models %>% summarise(rsq = summary(mod)$r.squared) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rsq #> #> 1 4 0.648 #> 2 6 0.0106 #> 3 8 0.270 # use broom to turn models into data models %>% do(data.frame( var = names(coef(.$mod)), coef(summary(.$mod))) ) #> # A tibble: 6 × 5 #> # Rowwise: #> var Estimate Std..Error t.value Pr...t.. #> #> 1 (Intercept) 40.9 3.59 11.4 0.00000120 #> 2 disp -0.135 0.0332 -4.07 0.00278 #> 3 (Intercept) 19.1 2.91 6.55 0.00124 #> 4 disp 0.00361 0.0156 0.232 0.826 #> 5 (Intercept) 22.0 3.35 6.59 0.0000259 #> 6 disp -0.0196 0.00932 -2.11 0.0568 # -> models %>% reframe(broom::tidy(mod)) #> # A tibble: 6 × 6 #> cyl term estimate std.error statistic p.value #> #> 1 4 (Intercept) 40.9 3.59 11.4 0.00000120 #> 2 4 disp -0.135 0.0332 -4.07 0.00278 #> 3 6 (Intercept) 19.1 2.91 6.55 0.00124 #> 4 6 disp 0.00361 0.0156 0.232 0.826 #> 5 8 (Intercept) 22.0 3.35 6.59 0.0000259 #> 6 8 disp -0.0196 0.00932 -2.11 0.0568"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":null,"dir":"Reference","previous_headings":"","what":"Locale used by arrange() — dplyr-locale","title":"Locale used by arrange() — dplyr-locale","text":"page documents details locale used arrange() ordering character vectors.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"default-locale","dir":"Reference","previous_headings":"","what":"Default locale","title":"Locale used by arrange() — dplyr-locale","text":"default locale used arrange() C locale. used .locale = NULL unless dplyr.legacy_locale global option set TRUE. can also force C locale used unconditionally .locale = \"C\". C locale exactly English locales, \"en\". main difference C locale groups English alphabet case, English locales group alphabet letter. example, c(\"\", \"b\", \"C\", \"B\", \"c\") sort c(\"B\", \"C\", \"\", \"b\", \"c\") C locale, uppercase letters coming lowercase letters, sort c(\"\", \"b\", \"B\", \"c\", \"C\") English locale. often makes little practical difference data analysis, return identical results case consistent observations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"reproducibility","dir":"Reference","previous_headings":"","what":"Reproducibility","title":"Locale used by arrange() — dplyr-locale","text":"C locale benefit completely reproducible across supported R versions operating systems extra effort. set .locale option stringi::stri_locale_list(), stringi must installed anyone wants run code. utilize package, stringi placed Imports.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"legacy-behavior","dir":"Reference","previous_headings":"","what":"Legacy behavior","title":"Locale used by arrange() — dplyr-locale","text":"Prior dplyr 1.1.0, character columns ordered system locale. need temporarily revert behavior, can set global option dplyr.legacy_locale TRUE, used sparingly expect option removed future version dplyr. better update existing code explicitly use .locale instead. Note setting dplyr.legacy_locale also force calls group_by() use system locale internally ordering groups. Setting .locale override usage dplyr.legacy_locale.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locale used by arrange() — dplyr-locale","text":"","code":"df <- tibble(x = c(\"a\", \"b\", \"C\", \"B\", \"c\")) df #> # A tibble: 5 × 1 #> x #> #> 1 a #> 2 b #> 3 C #> 4 B #> 5 c # Default locale is C, which groups the English alphabet by case, placing # uppercase letters before lowercase letters. arrange(df, x) #> # A tibble: 5 × 1 #> x #> #> 1 B #> 2 C #> 3 a #> 4 b #> 5 c # The American English locale groups the alphabet by letter. # Explicitly override `.locale` with `\"en\"` for this ordering. arrange(df, x, .locale = \"en\") #> # A tibble: 5 × 1 #> x #> #> 1 a #> 2 b #> 3 B #> 4 c #> 5 C # This Danish letter is expected to sort after `z` df <- tibble(x = c(\"o\", \"p\", \"\\u00F8\", \"z\")) df #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 p #> 3 ø #> 4 z # The American English locale sorts it right after `o` arrange(df, x, .locale = \"en\") #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 ø #> 3 p #> 4 z # Using `\"da\"` for Danish ordering gives the expected result arrange(df, x, .locale = \"da\") #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 p #> 3 z #> 4 ø # If you need the legacy behavior of `arrange()`, which respected the # system locale, then you can set the global option `dplyr.legacy_locale`, # but expect this to be removed in the future. We recommend that you use # the `.locale` argument instead. rlang::with_options(dplyr.legacy_locale = TRUE, { arrange(df, x) }) #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 p #> 3 z #> 4 ø"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-package.html","id":null,"dir":"Reference","previous_headings":"","what":"dplyr: A Grammar of Data Manipulation — dplyr-package","title":"dplyr: A Grammar of Data Manipulation — dplyr-package","text":"learn dplyr, start vignettes: browseVignettes(package = \"dplyr\")","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"dplyr: A Grammar of Data Manipulation — dplyr-package","text":"Maintainer: Hadley Wickham hadley@posit.co (ORCID) Authors: Romain François (ORCID) Lionel Henry Kirill Müller (ORCID) Davis Vaughan davis@posit.co (ORCID) contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Per-operation grouping with .by/by — dplyr_by","title":"Per-operation grouping with .by/by — dplyr_by","text":"two ways group dplyr: Persistent grouping group_by() Per-operation grouping ./help page dedicated explaining might want use latter. Depending dplyr verb, per-operation grouping argument may named .. Supported verbs section outlines case--case basis. remainder page refer .simplicity. Grouping radically affects computation dplyr verb use , one goals .allow place grouping specification alongside code actually uses . added benefit, .longer need remember ungroup() summarise(), summarise() ever message handling groups! idea comes data.table, allows specify alongside modifications j, like: dt[, .(x = mean(x)), = g].","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"supported-verbs","dir":"Reference","previous_headings":"","what":"Supported verbs","title":"Per-operation grouping with .by/by — dplyr_by","text":"mutate(.= ) summarise(.= ) reframe(.= ) filter(.= ) slice(.= ) slice_head(= ) slice_tail(= ) slice_min(= ) slice_max(= ) slice_sample(= ) Note dplyr verbs use others use .. purely technical difference.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"using-by","dir":"Reference","previous_headings":"","what":"Using .by","title":"Per-operation grouping with .by/by — dplyr_by","text":"take look two grouping approaches using expenses data set, tracks costs accumulated across various ids regions: Imagine wanted compute average cost per region. probably write something like : Instead, can now specify grouping inline within verb: .applies single operation, meaning since expenses ungrouped data frame, result applying .also always ungrouped data frame, regardless number grouping columns. Compare group_by() %>% summarise(), summarise() generally peels 1 layer grouping default, typically message : .grouping applies single operation, need worry ungrouping, never needs emit message remind groups. Note .specified multiple columns group using tidy-select syntax c(id, region). character vector column names like group , can .= all_of(my_cols). group columns order provided. prevent surprising results, use .existing grouped data frame: far focused usage .summarise(), .works number dplyr verbs. example, append mean cost per region onto original data frame new column rather computing summary: slice maximum cost per combination id region:","code":"expenses <- tibble( id = c(1, 2, 1, 3, 1, 2, 3), region = c(\"A\", \"A\", \"A\", \"B\", \"B\", \"A\", \"A\"), cost = c(25, 20, 19, 12, 9, 6, 6) ) expenses #> # A tibble: 7 x 3 #> id region cost #> #> 1 1 A 25 #> 2 2 A 20 #> 3 1 A 19 #> 4 3 B 12 #> 5 1 B 9 #> 6 2 A 6 #> 7 3 A 6 expenses %>% group_by(region) %>% summarise(cost = mean(cost)) #> # A tibble: 2 x 2 #> region cost #> #> 1 A 15.2 #> 2 B 10.5 expenses %>% summarise(cost = mean(cost), .by = region) #> # A tibble: 2 x 2 #> region cost #> #> 1 A 15.2 #> 2 B 10.5 expenses %>% summarise(cost = mean(cost), .by = c(id, region)) #> # A tibble: 5 x 3 #> id region cost #> #> 1 1 A 22 #> 2 2 A 13 #> 3 3 B 12 #> 4 1 B 9 #> 5 3 A 6 expenses %>% group_by(id, region) %>% summarise(cost = mean(cost)) #> `summarise()` has grouped output by 'id'. You can override using the `.groups` #> argument. #> # A tibble: 5 x 3 #> # Groups: id [3] #> id region cost #> #> 1 1 A 22 #> 2 1 B 9 #> 3 2 A 13 #> 4 3 A 6 #> 5 3 B 12 expenses %>% group_by(id) %>% summarise(cost = mean(cost), .by = c(id, region)) #> Error in `summarise()`: #> ! Can't supply `.by` when `.data` is a grouped data frame. expenses %>% mutate(cost_by_region = mean(cost), .by = region) #> # A tibble: 7 x 4 #> id region cost cost_by_region #> #> 1 1 A 25 15.2 #> 2 2 A 20 15.2 #> 3 1 A 19 15.2 #> 4 3 B 12 10.5 #> 5 1 B 9 10.5 #> 6 2 A 6 15.2 #> 7 3 A 6 15.2 # Note that the argument is named `by` in `slice_max()` expenses %>% slice_max(cost, n = 1, by = c(id, region)) #> # A tibble: 5 x 3 #> id region cost #> #> 1 1 A 25 #> 2 2 A 20 #> 3 3 B 12 #> 4 1 B 9 #> 5 3 A 6"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"result-ordering","dir":"Reference","previous_headings":"","what":"Result ordering","title":"Per-operation grouping with .by/by — dplyr_by","text":"used ., summarise(), reframe(), slice() maintain ordering existing data. different group_by(), always sorted group keys ascending order. need sorted group keys, recommend explicitly use arrange() either call summarise(), reframe(), slice(). also gives full access arrange()'s features, desc() .locale argument.","code":"df <- tibble( month = c(\"jan\", \"jan\", \"feb\", \"feb\", \"mar\"), temp = c(20, 25, 18, 20, 40) ) # Uses ordering by \"first appearance\" in the original data df %>% summarise(average_temp = mean(temp), .by = month) #> # A tibble: 3 x 2 #> month average_temp #> #> 1 jan 22.5 #> 2 feb 19 #> 3 mar 40 # Sorts in ascending order df %>% group_by(month) %>% summarise(average_temp = mean(temp)) #> # A tibble: 3 x 2 #> month average_temp #> #> 1 feb 19 #> 2 jan 22.5 #> 3 mar 40"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"verbs-without-by-support","dir":"Reference","previous_headings":"","what":"Verbs without .by support","title":"Per-operation grouping with .by/by — dplyr_by","text":"dplyr verb support ., typically means verb inherently affected grouping. example, pull() rename() support ., specifying columns group affect implementations. said, exceptions sometimes dplyr verb support ., special support grouped data frames created group_by(). typically verbs required retain grouping columns, example: select() always retains grouping columns, message specified select() call. distinct() count() place unspecified grouping columns front data frame computing results. arrange() .by_group argument optionally order grouping columns first. group_by() exist, verbs special support grouped data frames.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_data_masking.html","id":null,"dir":"Reference","previous_headings":"","what":"Data-masking — dplyr_data_masking","title":"Data-masking — dplyr_data_masking","text":"page now located ?rlang::args_data_masking.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":null,"dir":"Reference","previous_headings":"","what":"Extending dplyr with new data frame subclasses — dplyr_extending","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"three functions, along names<- 1d numeric [ (.e. x[loc]) methods, provide minimal interface extending dplyr work new data frame subclasses. means simple cases need provide couple methods, rather method every dplyr verb. functions stop-gap measure figure solve problem generally, likely code write implement find home comes next.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"","code":"dplyr_row_slice(data, i, ...) dplyr_col_modify(data, cols) dplyr_reconstruct(data, template)"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"data tibble. use tibbles avoid inconsistent subset-assignment use cases. numeric logical vector indexes rows data. cols named list used modify columns. NULL value remove existing column. template Template data frame use restoring attributes.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"basic-advice","dir":"Reference","previous_headings":"","what":"Basic advice","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"section gives basic advice want extend dplyr work custom data frame subclass, want dplyr methods behave basically way. data frame attributes depend rows columns (unconditionally preserved), need anything. one exception subclass extends data.frame directly rather extending tibble. [.data.frame method preserve attributes, need write [ method subclass preserves attributes important class. scalar attributes depend rows, implement dplyr_reconstruct() method. method recompute attribute depending rows now present. scalar attributes depend columns, implement dplyr_reconstruct() method 1d [ method. example, class requires certain columns present, method return data.frame tibble columns removed. attributes vectorised rows, implement dplyr_row_slice() method. gives access can modify row attribute accordingly. also need think carefully recompute attribute dplyr_reconstruct(), need carefully verify behaviour verb, provide additional methods needed. attributes vectorised columns, implement dplyr_col_modify(), 1d [, names<- methods. methods know columns modified, can update column attribute according. also need think carefully recompute attribute dplyr_reconstruct(), need carefully verify behaviour verb, provide additional methods needed.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"current-usage","dir":"Reference","previous_headings":"","what":"Current usage","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"arrange(), filter(), slice() (rest slice_*() family), semi_join(), anti_join() work generating vector row indices, subsetting dplyr_row_slice(). mutate() generates list new column value (using NULL indicate columns deleted), passes dplyr_col_modify(). also uses 1d [ implement .keep, call relocate() either ..supplied. summarise() reframe() work similarly mutate() data modified dplyr_col_modify() comes group_data() built .. Note means data frames returned summarise() reframe() fundamentally new data frames, retain custom subclasses attributes. select() uses 1d [ select columns, names<- rename . rename() just uses names<-. relocate() just uses 1d [. inner_join(), left_join(), right_join(), full_join() coerce x tibble, modify rows, use dplyr_reconstruct() convert back type x. nest_join() converts x y tibbles, modifies rows, uses dplyr_col_modify() handle modified key variables list-column y becomes. also uses dplyr_reconstruct() convert outer result back type x, convert nested tibbles back type y. distinct() mutate() expressions present, uses 1d [ select variables keep, dplyr_row_slice() select distinct rows. Note group_by() ungroup() use generics need provide methods directly, rely .per-operation grouping.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_tidy_select.html","id":null,"dir":"Reference","previous_headings":"","what":"Argument type: tidy-select — dplyr_tidy_select","title":"Argument type: tidy-select — dplyr_tidy_select","text":"page describes argument modifier indicates argument supports tidy selections. Tidy selection provides concise dialect R selecting variables based names properties. Tidy selection variant tidy evaluation. means inside functions, tidy-select arguments require special attention, described Indirection section . never heard tidy evaluation , start vignette(\"programming\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_tidy_select.html","id":"overview-of-selection-features","dir":"Reference","previous_headings":"","what":"Overview of selection features","title":"Argument type: tidy-select — dplyr_tidy_select","text":"Tidyverse selections implement dialect R operators make easy select variables: : selecting range consecutive variables. ! taking complement set variables. & | selecting intersection union two sets variables. c() combining selections. addition, can use selection helpers. helpers select specific columns: everything(): Matches variables. last_col(): Select last variable, possibly offset. group_cols(): Select grouping columns. helpers select variables matching patterns names: starts_with(): Starts prefix. ends_with(): Ends suffix. contains(): Contains literal string. matches(): Matches regular expression. num_range(): Matches numerical range like x01, x02, x03. variables stored character vector: all_of(): Matches variable names character vector. names must present, otherwise --bounds error thrown. any_of(): all_of(), except error thrown names exist. using predicate function: (): Applies function variables selects function returns TRUE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_tidy_select.html","id":"indirection","dir":"Reference","previous_headings":"","what":"Indirection","title":"Argument type: tidy-select — dplyr_tidy_select","text":"two main cases: character vector column names, use all_of() any_of(), depending whether want unknown variable names cause error, e.g. select(df, all_of(vars)), select(df, !any_of(vars)). want user able supply tidyselect specification function argument, embrace function argument, e.g. select(df, {{ vars }}).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":null,"dir":"Reference","previous_headings":"","what":"Explain details of a tbl — explain","title":"Explain details of a tbl — explain","text":"generic function gives details object print(), focused human readable output str().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Explain details of a tbl — explain","text":"","code":"explain(x, ...) show_query(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Explain details of a tbl — explain","text":"x object explain ... parameters possibly used generic","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Explain details of a tbl — explain","text":"first argument, invisibly.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"databases","dir":"Reference","previous_headings":"","what":"Databases","title":"Explain details of a tbl — explain","text":"Explaining tbl_sql run SQL EXPLAIN command describe query plan. requires little bit knowledge EXPLAIN works database, useful diagnosing performance problems.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Explain details of a tbl — explain","text":"","code":"# \\donttest{ lahman_s <- dbplyr::lahman_sqlite() #> Creating table: AllstarFull #> Creating table: Appearances #> Creating table: AwardsManagers #> Creating table: AwardsPlayers #> Creating table: AwardsShareManagers #> Creating table: AwardsSharePlayers #> Creating table: Batting #> Creating table: BattingPost #> Creating table: CollegePlaying #> Creating table: Fielding #> Creating table: FieldingOF #> Creating table: FieldingOFsplit #> Creating table: FieldingPost #> Creating table: HallOfFame #> Creating table: HomeGames #> Creating table: LahmanData #> Creating table: Managers #> Creating table: ManagersHalf #> Creating table: Parks #> Creating table: People #> Creating table: Pitching #> Creating table: PitchingPost #> Creating table: Salaries #> Creating table: Schools #> Creating table: SeriesPost #> Creating table: Teams #> Creating table: TeamsFranchises #> Creating table: TeamsHalf batting <- tbl(lahman_s, \"Batting\") batting %>% show_query() #> #> SELECT * #> FROM `Batting` batting %>% explain() #> #> SELECT * #> FROM `Batting` #> #> #> id parent notused detail #> 1 2 0 0 SCAN Batting # The batting database has indices on all ID variables: # SQLite automatically picks the most restrictive index batting %>% filter(lgID == \"NL\" & yearID == 2000L) %>% explain() #> #> SELECT `Batting`.* #> FROM `Batting` #> WHERE (`lgID` = 'NL' AND `yearID` = 2000) #> #> #> id parent notused detail #> 1 3 0 0 SEARCH Batting USING INDEX Batting_yearID (yearID=?) # OR's will use multiple indexes batting %>% filter(lgID == \"NL\" | yearID == 2000) %>% explain() #> #> SELECT `Batting`.* #> FROM `Batting` #> WHERE (`lgID` = 'NL' OR `yearID` = 2000.0) #> #> #> id parent notused detail #> 1 4 0 0 MULTI-INDEX OR #> 2 5 4 0 INDEX 1 #> 3 11 5 0 SEARCH Batting USING INDEX Batting_lgID (lgID=?) #> 4 16 4 0 INDEX 2 #> 5 22 16 0 SEARCH Batting USING INDEX Batting_yearID (yearID=?) # Joins will use indexes in both tables teams <- tbl(lahman_s, \"Teams\") batting %>% left_join(teams, c(\"yearID\", \"teamID\")) %>% explain() #> #> SELECT #> `playerID`, #> `Batting`.`yearID` AS `yearID`, #> `stint`, #> `Batting`.`teamID` AS `teamID`, #> `Batting`.`lgID` AS `lgID.x`, #> `Batting`.`G` AS `G.x`, #> `Batting`.`AB` AS `AB.x`, #> `Batting`.`R` AS `R.x`, #> `Batting`.`H` AS `H.x`, #> `Batting`.`X2B` AS `X2B.x`, #> `Batting`.`X3B` AS `X3B.x`, #> `Batting`.`HR` AS `HR.x`, #> `RBI`, #> `Batting`.`SB` AS `SB.x`, #> `Batting`.`CS` AS `CS.x`, #> `Batting`.`BB` AS `BB.x`, #> `Batting`.`SO` AS `SO.x`, #> `IBB`, #> `Batting`.`HBP` AS `HBP.x`, #> `SH`, #> `Batting`.`SF` AS `SF.x`, #> `GIDP`, #> `Teams`.`lgID` AS `lgID.y`, #> `franchID`, #> `divID`, #> `Rank`, #> `Teams`.`G` AS `G.y`, #> `Ghome`, #> `W`, #> `L`, #> `DivWin`, #> `WCWin`, #> `LgWin`, #> `WSWin`, #> `Teams`.`R` AS `R.y`, #> `Teams`.`AB` AS `AB.y`, #> `Teams`.`H` AS `H.y`, #> `Teams`.`X2B` AS `X2B.y`, #> `Teams`.`X3B` AS `X3B.y`, #> `Teams`.`HR` AS `HR.y`, #> `Teams`.`BB` AS `BB.y`, #> `Teams`.`SO` AS `SO.y`, #> `Teams`.`SB` AS `SB.y`, #> `Teams`.`CS` AS `CS.y`, #> `Teams`.`HBP` AS `HBP.y`, #> `Teams`.`SF` AS `SF.y`, #> `RA`, #> `ER`, #> `ERA`, #> `CG`, #> `SHO`, #> `SV`, #> `IPouts`, #> `HA`, #> `HRA`, #> `BBA`, #> `SOA`, #> `E`, #> `DP`, #> `FP`, #> `name`, #> `park`, #> `attendance`, #> `BPF`, #> `PPF`, #> `teamIDBR`, #> `teamIDlahman45`, #> `teamIDretro` #> FROM `Batting` #> LEFT JOIN `Teams` #> ON ( #> `Batting`.`yearID` = `Teams`.`yearID` AND #> `Batting`.`teamID` = `Teams`.`teamID` #> ) #> #> #> id parent notused #> 1 4 0 0 #> 2 6 0 0 #> detail #> 1 SCAN Batting #> 2 SEARCH Teams USING INDEX Teams_yearID (yearID=?) LEFT-JOIN # }"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering joins — filter-joins","title":"Filtering joins — filter-joins","text":"Filtering joins filter rows x based presence absence matches y: semi_join() return rows x match y. anti_join() return rows x without match y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering joins — filter-joins","text":"","code":"semi_join(x, y, by = NULL, copy = FALSE, ...) # S3 method for data.frame semi_join(x, y, by = NULL, copy = FALSE, ..., na_matches = c(\"na\", \"never\")) anti_join(x, y, by = NULL, copy = FALSE, ...) # S3 method for data.frame anti_join(x, y, by = NULL, copy = FALSE, ..., na_matches = c(\"na\", \"never\"))"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering joins — filter-joins","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. join specification created join_by(), character vector variables join . NULL, default, *_join() perform natural join, using variables common across x y. message lists variables can check correct; suppress message supplying explicitly. join different variables x y, use join_by() specification. example, join_by(== b) match x$y$b. join multiple variables, use join_by() specification multiple expressions. example, join_by(== b, c == d) match x$y$b x$c y$d. column names x y, can shorten listing variable names, like join_by(, c). join_by() can also used perform inequality, rolling, overlap joins. See documentation ?join_by details types joins. simple equality joins, can alternatively specify character vector variable names join . example, = c(\"\", \"b\") joins x$y$x$b y$b. variable names differ x y, use named character vector like = c(\"x_a\" = \"y_a\", \"x_b\" = \"y_b\"). perform cross-join, generating combinations x y, see cross_join(). copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . ... parameters passed onto methods. na_matches two NA two NaN values match? \"na\", default, treats two NA two NaN values equal, like %%, match(), merge(). \"never\" treats two NA two NaN values different, never match together values. similar joins database sources base::merge(incomparables = NA).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering joins — filter-joins","text":"object type x. output following properties: Rows subset input, appear order. Columns modified. Data frame attributes preserved. Groups taken x. number groups may reduced.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Filtering joins — filter-joins","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: semi_join(): dbplyr (tbl_lazy), dplyr (data.frame) . anti_join(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filtering joins — filter-joins","text":"","code":"# \"Filtering\" joins keep cases from the LHS band_members %>% semi_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles band_members %>% anti_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 1 × 2 #> name band #> #> 1 Mick Stones # To suppress the message about joining variables, supply `by` band_members %>% semi_join(band_instruments, by = join_by(name)) #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles # This is good practice in production code"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":null,"dir":"Reference","previous_headings":"","what":"Keep rows that match a condition — filter","title":"Keep rows that match a condition — filter","text":"filter() function used subset data frame, retaining rows satisfy conditions. retained, row must produce value TRUE conditions. Note condition evaluates NA row dropped, unlike base subsetting [.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Keep rows that match a condition — filter","text":"","code":"filter(.data, ..., .by = NULL, .preserve = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Keep rows that match a condition — filter","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Expressions return logical value, defined terms variables .data. multiple expressions included, combined & operator. rows conditions evaluate TRUE kept. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .preserve Relevant .data input grouped. .preserve = FALSE (default), grouping structure recalculated based resulting data, otherwise grouping kept .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Keep rows that match a condition — filter","text":"object type .data. output following properties: Rows subset input, appear order. Columns modified. number groups may reduced (.preserve TRUE). Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Keep rows that match a condition — filter","text":"filter() function used subset rows .data, applying expressions ... column values determine rows retained. can applied grouped ungrouped data (see group_by() ungroup()). However, dplyr yet smart enough optimise filtering operation grouped datasets need grouped calculations. reason, filtering often considerably faster ungrouped data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"useful-filter-functions","dir":"Reference","previous_headings":"","what":"Useful filter functions","title":"Keep rows that match a condition — filter","text":"many functions operators useful constructing expressions used filter data: ==, >, >= etc &, |, !, xor() .na() (), near()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"grouped-tibbles","dir":"Reference","previous_headings":"","what":"Grouped tibbles","title":"Keep rows that match a condition — filter","text":"filtering expressions computed within groups, may yield different results grouped tibbles. case soon aggregating, lagging, ranking function involved. Compare ungrouped filtering: grouped equivalent: ungrouped version, filter() compares value mass row global average (taken whole data set), keeping rows mass greater global average. contrast, grouped version calculates average mass separately gender group, keeps rows mass greater relevant within-gender average.","code":"starwars %>% filter(mass > mean(mass, na.rm = TRUE)) starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE))"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Keep rows that match a condition — filter","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame, ts) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Keep rows that match a condition — filter","text":"","code":"# Filtering by one criterion filter(starwars, species == \"Human\") #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 Darth Va… 202 136 none white yellow 41.9 male #> 3 Leia Org… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> 5 Beru Whi… 165 75 brown light blue 47 fema… #> 6 Biggs Da… 183 84 black light brown 24 male #> 7 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> 8 Anakin S… 188 84 blond fair blue 41.9 male #> 9 Wilhuff … 180 NA auburn, g… fair blue 64 male #> 10 Han Solo 180 80 brown fair brown 29 male #> # ℹ 25 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships filter(starwars, mass > 1000) #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # Filtering by multiple criteria within a single logical expression filter(starwars, hair_color == \"none\" & eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, bl… black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> 5 Lama Su 229 88 none grey black NA male #> 6 Taun We 213 NA none grey black NA fema… #> 7 Shaak Ti 178 57 none red, blue… black NA fema… #> 8 Tion Medon 206 80 none grey black NA male #> 9 BB8 NA NA none none black NA none #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships filter(starwars, hair_color == \"none\" | eye_color == \"black\") #> # A tibble: 39 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth Va… 202 136 none white yellow 41.9 male #> 2 Greedo 173 74 NA green black 44 male #> 3 IG-88 200 140 none metal red 15 none #> 4 Bossk 190 113 none green red 53 male #> 5 Lobot 175 79 none light blue 37 male #> 6 Ackbar 180 83 none brown mot… orange 41 male #> 7 Nien Nunb 160 68 none grey black NA male #> 8 Nute Gun… 191 90 none mottled g… red NA male #> 9 Jar Jar … 196 66 none orange orange 52 male #> 10 Roos Tar… 224 82 none grey orange NA male #> # ℹ 29 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # When multiple expressions are used, they are combined using & filter(starwars, hair_color == \"none\", eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, bl… black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> 5 Lama Su 229 88 none grey black NA male #> 6 Taun We 213 NA none grey black NA fema… #> 7 Shaak Ti 178 57 none red, blue… black NA fema… #> 8 Tion Medon 206 80 none grey black NA male #> 9 BB8 NA NA none none black NA none #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # The filtering operation may yield different results on grouped # tibbles because the expressions are computed within groups. # # The following filters rows where `mass` is greater than the # global average: starwars %>% filter(mass > mean(mass, na.rm = TRUE)) #> # A tibble: 10 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth Va… 202 136 none white yellow 41.9 male #> 2 Owen Lars 178 120 brown, gr… light blue 52 male #> 3 Chewbacca 228 112 brown unknown blue 200 male #> 4 Jabba De… 175 1358 NA green-tan… orange 600 herm… #> 5 Jek Tono… 180 110 brown fair blue NA NA #> 6 IG-88 200 140 none metal red 15 none #> 7 Bossk 190 113 none green red 53 male #> 8 Dexter J… 198 102 none brown yellow NA male #> 9 Grievous 216 159 none brown, wh… green, y… NA male #> 10 Tarfful 234 136 brown brown blue NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # Whereas this keeps rows with `mass` greater than the gender # average: starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) #> # A tibble: 15 × 14 #> # Groups: gender [3] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth V… 202 136 none white yellow 41.9 male #> 2 Owen La… 178 120 brown, gr… light blue 52 male #> 3 Beru Wh… 165 75 brown light blue 47 fema… #> 4 Chewbac… 228 112 brown unknown blue 200 male #> 5 Jabba D… 175 1358 NA green-tan… orange 600 herm… #> 6 Jek Ton… 180 110 brown fair blue NA NA #> 7 IG-88 200 140 none metal red 15 none #> 8 Bossk 190 113 none green red 53 male #> 9 Ayla Se… 178 55 none blue hazel 48 fema… #> 10 Gregar … 185 85 black dark brown NA NA #> 11 Luminar… 170 56.2 black yellow blue 58 fema… #> 12 Zam Wes… 168 55 blonde fair, gre… yellow NA fema… #> 13 Shaak Ti 178 57 none red, blue… black NA fema… #> 14 Grievous 216 159 none brown, wh… green, y… NA male #> 15 Tarfful 234 136 brown brown blue NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # To refer to column names that are stored as strings, use the `.data` pronoun: vars <- c(\"mass\", \"height\") cond <- c(80, 150) starwars %>% filter( .data[[vars[[1]]]] > cond[[1]], .data[[vars[[2]]]] > cond[[2]] ) #> # A tibble: 21 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth Va… 202 136 none white yellow 41.9 male #> 2 Owen Lars 178 120 brown, gr… light blue 52 male #> 3 Biggs Da… 183 84 black light brown 24 male #> 4 Anakin S… 188 84 blond fair blue 41.9 male #> 5 Chewbacca 228 112 brown unknown blue 200 male #> 6 Jabba De… 175 1358 NA green-tan… orange 600 herm… #> 7 Jek Tono… 180 110 brown fair blue NA NA #> 8 IG-88 200 140 none metal red 15 none #> 9 Bossk 190 113 none green red 53 male #> 10 Ackbar 180 83 none brown mot… orange 41 male #> # ℹ 11 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # Learn more in ?rlang::args_data_masking"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Filter within a selection of variables — filter_all","title":"Filter within a selection of variables — filter_all","text":"Scoped verbs (_if, _at, _all) superseded use if_all() if_any() existing verb. See vignette(\"colwise\") details. scoped filtering verbs apply predicate expression selection variables. predicate expression quoted all_vars() any_vars() mention pronoun . refer variables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filter within a selection of variables — filter_all","text":"","code":"filter_all(.tbl, .vars_predicate, .preserve = FALSE) filter_if(.tbl, .predicate, .vars_predicate, .preserve = FALSE) filter_at(.tbl, .vars, .vars_predicate, .preserve = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filter within a selection of variables — filter_all","text":".tbl tbl object. .vars_predicate quoted predicate expression returned all_vars() any_vars(). Can also function purrr-like formula. case, intersection results taken default currently way request union. .preserve FALSE (default), grouping structure recalculated based resulting data, otherwise kept . .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Filter within a selection of variables — filter_all","text":"grouping variables part selection taken account determine filtered rows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filter within a selection of variables — filter_all","text":"","code":"# While filter() accepts expressions with specific variables, the # scoped filter verbs take an expression with the pronoun `.` and # replicate it over all variables. This expression should be quoted # with all_vars() or any_vars(): all_vars(is.na(.)) #> #> #> expr: ^is.na(.) #> env: 0x558a995fa990 any_vars(is.na(.)) #> #> #> expr: ^is.na(.) #> env: 0x558a995fa990 # You can take the intersection of the replicated expressions: filter_all(mtcars, all_vars(. > 150)) #> [1] mpg cyl disp hp drat wt qsec vs am gear carb #> <0 rows> (or 0-length row.names) # -> filter(mtcars, if_all(everything(), ~ .x > 150)) #> [1] mpg cyl disp hp drat wt qsec vs am gear carb #> <0 rows> (or 0-length row.names) # Or the union: filter_all(mtcars, any_vars(. > 150)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 # -> filter(mtcars, if_any(everything(), ~ . > 150)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 # You can vary the selection of columns on which to apply the # predicate. filter_at() takes a vars() specification: filter_at(mtcars, vars(starts_with(\"d\")), any_vars((. %% 2) == 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400 175 3.08 3.845 17.05 0 0 3 2 # -> filter(mtcars, if_any(starts_with(\"d\"), ~ (.x %% 2) == 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400 175 3.08 3.845 17.05 0 0 3 2 # And filter_if() selects variables with a predicate function: filter_if(mtcars, ~ all(floor(.) == .), all_vars(. != 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # -> is_int <- function(x) all(floor(x) == x) filter(mtcars, if_all(where(is_int), ~ .x != 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a list of function calls — funs","title":"Create a list of function calls — funs","text":"funs() deprecated; please use list() instead. deprecated function provided unique way specifying anonymous functions, rather adopting conventions used purrr packages tidyverse.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a list of function calls — funs","text":"","code":"funs(..., .args = list())"},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a list of function calls — funs","text":"... list functions specified : name, \"mean\" function , mean call function . dummy argument, mean(., na.rm = TRUE) following notations supported, see examples: anonymous function, function(x) mean(x, na.rm = TRUE) anonymous function purrr notation, ~mean(., na.rm = TRUE) .args, args named list additional arguments added function calls. funs() deprecated, use methods supply arguments: ... argument scoped verbs make functions purrr::partial().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a list of function calls — funs","text":"","code":"funs(\"mean\", mean(., na.rm = TRUE)) #> Warning: `funs()` was deprecated in dplyr 0.8.0. #> ℹ Please use a list of either functions or lambdas: #> #> # Simple named list: list(mean = mean, median = median) #> #> # Auto named with `tibble::lst()`: tibble::lst(mean, median) #> #> # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE)) #> #> $ mean: mean(.) #> $ mean: mean(., na.rm = TRUE) # -> list(mean = mean, mean = ~ mean(.x, na.rm = TRUE)) #> $mean #> function (x, ...) #> UseMethod(\"mean\") #> #> #> #> $mean #> ~mean(.x, na.rm = TRUE) #> #> funs(m1 = mean, m2 = \"mean\", m3 = mean(., na.rm = TRUE)) #> Warning: `funs()` was deprecated in dplyr 0.8.0. #> ℹ Please use a list of either functions or lambdas: #> #> # Simple named list: list(mean = mean, median = median) #> #> # Auto named with `tibble::lst()`: tibble::lst(mean, median) #> #> # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE)) #> #> $ m1: mean(.) #> $ m2: mean(.) #> $ m3: mean(., na.rm = TRUE) # -> list(m1 = mean, m2 = \"mean\", m3 = ~ mean(.x, na.rm = TRUE)) #> $m1 #> function (x, ...) #> UseMethod(\"mean\") #> #> #> #> $m2 #> [1] \"mean\" #> #> $m3 #> ~mean(.x, na.rm = TRUE) #> #>"},{"path":"https://dplyr.tidyverse.org/dev/reference/glimpse.html","id":null,"dir":"Reference","previous_headings":"","what":"Get a glimpse of your data — glimpse","title":"Get a glimpse of your data — glimpse","text":"glimpse() like transposed version print(): columns run page, data runs across. makes possible see every column data frame. little like str() applied data frame tries show much data possible. (always shows underlying data, even applied remote data source.) glimpse() provided pillar package, re-exported dplyr. See pillar::glimpse() details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/glimpse.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get a glimpse of your data — glimpse","text":"x original x (invisibly) returned, allowing glimpse() used within data pipeline.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/glimpse.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get a glimpse of your data — glimpse","text":"","code":"glimpse(mtcars) #> Rows: 32 #> Columns: 11 #> $ mpg 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2,… #> $ cyl 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4,… #> $ disp 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140… #> $ hp 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 18… #> $ drat 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92,… #> $ wt 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.1… #> $ qsec 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.… #> $ vs 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ am 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ gear 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4,… #> $ carb 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,… # Note that original x is (invisibly) returned, allowing `glimpse()` to be # used within a pipeline. mtcars %>% glimpse() %>% select(1:3) #> Rows: 32 #> Columns: 11 #> $ mpg 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2,… #> $ cyl 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4,… #> $ disp 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140… #> $ hp 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 18… #> $ drat 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92,… #> $ wt 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.1… #> $ qsec 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.… #> $ vs 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ am 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ gear 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4,… #> $ carb 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,… #> mpg cyl disp #> Mazda RX4 21.0 6 160.0 #> Mazda RX4 Wag 21.0 6 160.0 #> Datsun 710 22.8 4 108.0 #> Hornet 4 Drive 21.4 6 258.0 #> Hornet Sportabout 18.7 8 360.0 #> Valiant 18.1 6 225.0 #> Duster 360 14.3 8 360.0 #> Merc 240D 24.4 4 146.7 #> Merc 230 22.8 4 140.8 #> Merc 280 19.2 6 167.6 #> Merc 280C 17.8 6 167.6 #> Merc 450SE 16.4 8 275.8 #> Merc 450SL 17.3 8 275.8 #> Merc 450SLC 15.2 8 275.8 #> Cadillac Fleetwood 10.4 8 472.0 #> Lincoln Continental 10.4 8 460.0 #> Chrysler Imperial 14.7 8 440.0 #> Fiat 128 32.4 4 78.7 #> Honda Civic 30.4 4 75.7 #> Toyota Corolla 33.9 4 71.1 #> Toyota Corona 21.5 4 120.1 #> Dodge Challenger 15.5 8 318.0 #> AMC Javelin 15.2 8 304.0 #> Camaro Z28 13.3 8 350.0 #> Pontiac Firebird 19.2 8 400.0 #> Fiat X1-9 27.3 4 79.0 #> Porsche 914-2 26.0 4 120.3 #> Lotus Europa 30.4 4 95.1 #> Ford Pantera L 15.8 8 351.0 #> Ferrari Dino 19.7 6 145.0 #> Maserati Bora 15.0 8 301.0 #> Volvo 142E 21.4 4 121.0 glimpse(starwars) #> Rows: 87 #> Columns: 14 #> $ name \"Luke Skywalker\", \"C-3PO\", \"R2-D2\", \"Darth Vader\", \"L… #> $ height 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, … #> $ mass 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.… #> $ hair_color \"blond\", NA, NA, \"none\", \"brown\", \"brown, grey\", \"bro… #> $ skin_color \"fair\", \"gold\", \"white, blue\", \"white\", \"light\", \"lig… #> $ eye_color \"blue\", \"yellow\", \"red\", \"yellow\", \"brown\", \"blue\", \"… #> $ birth_year 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, … #> $ sex \"male\", \"none\", \"none\", \"male\", \"female\", \"male\", \"fe… #> $ gender \"masculine\", \"masculine\", \"masculine\", \"masculine\", \"… #> $ homeworld \"Tatooine\", \"Tatooine\", \"Naboo\", \"Tatooine\", \"Alderaa… #> $ species \"Human\", \"Droid\", \"Droid\", \"Human\", \"Human\", \"Human\",… #> $ films <\"A New Hope\", \"The Empire Strikes Back\", \"Return of… #> $ vehicles <\"Snowspeeder\", \"Imperial Speeder Bike\">, <>, <>, <>… #> $ starships <\"X-wing\", \"Imperial shuttle\">, <>, <>, \"TIE Advance…"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Group by one or more variables — group_by","title":"Group by one or more variables — group_by","text":"data operations done groups defined variables. group_by() takes existing tbl converts grouped tbl operations performed \"group\". ungroup() removes grouping.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Group by one or more variables — group_by","text":"","code":"group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) ungroup(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Group by one or more variables — group_by","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... group_by(), variables computations group . Computations always done ungrouped data frame. perform computations grouped data, need use separate mutate() step group_by(). Computations allowed nest_by(). ungroup(), variables remove grouping. .add FALSE, default, group_by() override existing groups. add existing groups, use .add = TRUE. argument previously called add, prevented creating new grouping variable called add, conflicts naming conventions. .drop Drop groups formed factor levels appear data? default TRUE except .data previously grouped .drop = FALSE. See group_by_drop_default() details. x tbl()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Group by one or more variables — group_by","text":"grouped data frame class grouped_df, unless combination ... add yields empty set grouping columns, case tibble returned.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Group by one or more variables — group_by","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: group_by(): dbplyr (tbl_lazy), dplyr (data.frame) . ungroup(): dbplyr (tbl_lazy), dplyr (data.frame, grouped_df, rowwise_df) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"ordering","dir":"Reference","previous_headings":"","what":"Ordering","title":"Group by one or more variables — group_by","text":"Currently, group_by() internally orders groups ascending order. results ordered output functions aggregate groups, summarise(). used grouping columns, character vectors ordered C locale performance reproducibility across R sessions. resulting ordering grouped operation matters dependent locale, follow grouped operation explicit call arrange() set .locale argument. example: often useful preliminary step generating content intended humans, HTML table.","code":"data %>% group_by(chr) %>% summarise(avg = mean(x)) %>% arrange(chr, .locale = \"en\")"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"legacy-behavior","dir":"Reference","previous_headings":"","what":"Legacy behavior","title":"Group by one or more variables — group_by","text":"Prior dplyr 1.1.0, character vector grouping columns ordered system locale. need temporarily revert behavior, can set global option dplyr.legacy_locale TRUE, used sparingly expect option removed future version dplyr. better update existing code explicitly call arrange(.locale = ) instead. Note setting dplyr.legacy_locale also force calls arrange() use system locale.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Group by one or more variables — group_by","text":"","code":"by_cyl <- mtcars %>% group_by(cyl) # grouping doesn't change how the data looks (apart from listing # how it's grouped): by_cyl #> # A tibble: 32 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # It changes how it acts with the other dplyr verbs: by_cyl %>% summarise( disp = mean(disp), hp = mean(hp) ) #> # A tibble: 3 × 3 #> cyl disp hp #> #> 1 4 105. 82.6 #> 2 6 183. 122. #> 3 8 353. 209. by_cyl %>% filter(disp == max(disp)) #> # A tibble: 3 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 # Each call to summarise() removes a layer of grouping by_vs_am <- mtcars %>% group_by(vs, am) by_vs <- by_vs_am %>% summarise(n = n()) #> `summarise()` has grouped output by 'vs'. You can override using the #> `.groups` argument. by_vs #> # A tibble: 4 × 3 #> # Groups: vs [2] #> vs am n #> #> 1 0 0 12 #> 2 0 1 6 #> 3 1 0 7 #> 4 1 1 7 by_vs %>% summarise(n = sum(n)) #> # A tibble: 2 × 2 #> vs n #> #> 1 0 18 #> 2 1 14 # To removing grouping, use ungroup by_vs %>% ungroup() %>% summarise(n = sum(n)) #> # A tibble: 1 × 1 #> n #> #> 1 32 # By default, group_by() overrides existing grouping by_cyl %>% group_by(vs, am) %>% group_vars() #> [1] \"vs\" \"am\" # Use add = TRUE to instead append by_cyl %>% group_by(vs, am, .add = TRUE) %>% group_vars() #> [1] \"cyl\" \"vs\" \"am\" # You can group by expressions: this is a short-hand # for a mutate() followed by a group_by() mtcars %>% group_by(vsam = vs + am) #> # A tibble: 32 × 12 #> # Groups: vsam [3] #> mpg cyl disp hp drat wt qsec vs am gear carb vsam #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 1 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 1 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 2 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 0 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 0 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 1 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 1 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 1 #> # ℹ 22 more rows # The implicit mutate() step is always performed on the # ungrouped data. Here we get 3 groups: mtcars %>% group_by(vs) %>% group_by(hp_cut = cut(hp, 3)) #> # A tibble: 32 × 12 #> # Groups: hp_cut [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 1 more variable: hp_cut # If you want it to be performed by groups, # you have to use an explicit mutate() call. # Here we get 3 groups per value of vs mtcars %>% group_by(vs) %>% mutate(hp_cut = cut(hp, 3)) %>% group_by(hp_cut) #> # A tibble: 32 × 12 #> # Groups: hp_cut [6] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 1 more variable: hp_cut # when factors are involved and .drop = FALSE, groups can be empty tbl <- tibble( x = 1:10, y = factor(rep(c(\"a\", \"c\"), each = 5), levels = c(\"a\", \"b\", \"c\")) ) tbl %>% group_by(y, .drop = FALSE) %>% group_rows() #> [3]> #> [[1]] #> [1] 1 2 3 4 5 #> #> [[2]] #> integer(0) #> #> [[3]] #> [1] 6 7 8 9 10 #>"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Group by a selection of variables — group_by_all","title":"Group by a selection of variables — group_by_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants group_by() group data frame selection variables. Like group_by(), optional mutate semantics.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Group by a selection of variables — group_by_all","text":"","code":"group_by_all( .tbl, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_at( .tbl, .vars, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_if( .tbl, .predicate, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) )"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Group by a selection of variables — group_by_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .add See group_by() .drop Drop groups formed factor levels appear data? default TRUE except .data previously grouped .drop = FALSE. See group_by_drop_default() details. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Group by a selection of variables — group_by_all","text":"Existing grouping variables maintained, even included selection.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Group by a selection of variables — group_by_all","text":"","code":"# Group a data frame by all variables: group_by_all(mtcars) #> # A tibble: 32 × 11 #> # Groups: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb [32] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% group_by(pick(everything())) #> # A tibble: 32 × 11 #> # Groups: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb [32] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # Group by variables selected with a predicate: group_by_if(iris, is.factor) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # -> iris %>% group_by(pick(where(is.factor))) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # Group by variables selected by name: group_by_at(mtcars, vars(vs, am)) #> # A tibble: 32 × 11 #> # Groups: vs, am [4] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% group_by(pick(vs, am)) #> # A tibble: 32 × 11 #> # Groups: vs, am [4] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # Like group_by(), the scoped variants have optional mutate # semantics. This provide a shortcut for group_by() + mutate(): d <- tibble(x=c(1,1,2,2), y=c(1,2,1,2)) group_by_all(d, as.factor) #> # A tibble: 4 × 2 #> # Groups: x, y [4] #> x y #> #> 1 1 1 #> 2 1 2 #> 3 2 1 #> 4 2 2 # -> d %>% group_by(across(everything(), as.factor)) #> # A tibble: 4 × 2 #> # Groups: x, y [4] #> x y #> #> 1 1 1 #> 2 1 2 #> 3 2 1 #> 4 2 2 group_by_if(iris, is.factor, as.character) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # -> iris %>% group_by(across(where(is.factor), as.character)) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":null,"dir":"Reference","previous_headings":"","what":"Default value for .drop argument of group_by — group_by_drop_default","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"Default value .drop argument group_by","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"","code":"group_by_drop_default(.tbl)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default value for .drop argument of group_by — group_by_drop_default","text":".tbl data frame","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"TRUE unless .tbl grouped data frame previously obtained group_by(.drop = FALSE)","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"","code":"group_by_drop_default(iris) #> [1] TRUE iris %>% group_by(Species) %>% group_by_drop_default() #> [1] TRUE iris %>% group_by(Species, .drop = FALSE) %>% group_by_drop_default() #> [1] FALSE"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_prepare.html","id":null,"dir":"Reference","previous_headings":"","what":"Prepare for grouping and other operations — distinct_prepare","title":"Prepare for grouping and other operations — distinct_prepare","text":"*_prepare() performs standard manipulation needed prior actual data processing. needed packages implement dplyr backends.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_prepare.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prepare for grouping and other operations — distinct_prepare","text":"","code":"distinct_prepare( .data, vars, group_vars = character(), .keep_all = FALSE, caller_env = caller_env(2), error_call = caller_env() ) group_by_prepare( .data, ..., .add = FALSE, .dots = deprecated(), add = deprecated(), error_call = caller_env() )"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_prepare.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prepare for grouping and other operations — distinct_prepare","text":"list data Modified tbl groups Modified groups","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":null,"dir":"Reference","previous_headings":"","what":"Select grouping variables — group_cols","title":"Select grouping variables — group_cols","text":"selection helpers matches grouping variables. can used select() vars() selections.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select grouping variables — group_cols","text":"","code":"group_cols(vars = NULL, data = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select grouping variables — group_cols","text":"vars Deprecated; please use data instead. data advanced use . default NULL automatically finds \"current\" data frames.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select grouping variables — group_cols","text":"","code":"gdf <- iris %>% group_by(Species) gdf %>% select(group_cols()) #> # A tibble: 150 × 1 #> # Groups: Species [3] #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows # Remove the grouping variables from mutate selections: gdf %>% mutate_at(vars(-group_cols()), `/`, 100) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 0.051 0.035 0.014 0.002 setosa #> 2 0.049 0.03 0.014 0.002 setosa #> 3 0.047 0.032 0.013 0.002 setosa #> 4 0.046 0.031 0.015 0.002 setosa #> 5 0.05 0.036 0.014 0.002 setosa #> 6 0.054 0.039 0.017 0.004 setosa #> 7 0.046 0.034 0.014 0.003 setosa #> 8 0.05 0.034 0.015 0.002 setosa #> 9 0.044 0.029 0.014 0.002 setosa #> 10 0.049 0.031 0.015 0.001 setosa #> # ℹ 140 more rows # -> No longer necessary with across() gdf %>% mutate(across(everything(), ~ . / 100)) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 0.051 0.035 0.014 0.002 setosa #> 2 0.049 0.03 0.014 0.002 setosa #> 3 0.047 0.032 0.013 0.002 setosa #> 4 0.046 0.031 0.015 0.002 setosa #> 5 0.05 0.036 0.014 0.002 setosa #> 6 0.054 0.039 0.017 0.004 setosa #> 7 0.046 0.034 0.014 0.003 setosa #> 8 0.05 0.034 0.015 0.002 setosa #> 9 0.044 0.029 0.014 0.002 setosa #> 10 0.049 0.031 0.015 0.001 setosa #> # ℹ 140 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Grouping metadata — group_data","title":"Grouping metadata — group_data","text":"collection functions accesses data grouped data frames various ways: group_data() returns data frame defines grouping structure. columns give values grouping variables. last column, always called .rows, list integer vectors gives location rows group. group_keys() returns data frame describing groups. group_rows() returns list integer vectors giving rows group contains. group_indices() returns integer vector length .data gives group row belongs . group_vars() gives names grouping variables character vector. groups() gives names grouping variables list symbols. group_size() gives size group. n_groups() gives total number groups. See context equivalent functions return values current group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Grouping metadata — group_data","text":"","code":"group_data(.data) group_keys(.tbl, ...) group_rows(.data) group_indices(.data, ...) group_vars(x) groups(x) group_size(x) n_groups(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Grouping metadata — group_data","text":".data, .tbl, x data frame extension (like tibble grouped tibble). ... Use ... now deprecated; please use group_by() first instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Grouping metadata — group_data","text":"","code":"df <- tibble(x = c(1,1,2,2)) group_vars(df) #> character(0) group_rows(df) #> [1]> #> [[1]] #> [1] 1 2 3 4 #> group_data(df) #> # A tibble: 1 × 1 #> .rows #> > #> 1 [4] group_indices(df) #> [1] 1 1 1 1 gf <- group_by(df, x) group_vars(gf) #> [1] \"x\" group_rows(gf) #> [2]> #> [[1]] #> [1] 1 2 #> #> [[2]] #> [1] 3 4 #> group_data(gf) #> # A tibble: 2 × 2 #> x .rows #> > #> 1 1 [2] #> 2 2 [2] group_indices(gf) #> [1] 1 1 2 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":null,"dir":"Reference","previous_headings":"","what":"Apply a function to each group — group_map","title":"Apply a function to each group — group_map","text":"group_map(), group_modify() group_walk() purrr-style functions can used iterate grouped tibbles.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Apply a function to each group — group_map","text":"","code":"group_map(.data, .f, ..., .keep = FALSE) group_modify(.data, .f, ..., .keep = FALSE) group_walk(.data, .f, ..., .keep = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Apply a function to each group — group_map","text":".data grouped tibble .f function formula apply group. function, used . least 2 formal arguments. formula, e.g. ~ head(.x), converted function. formula, can use . .x refer subset rows .tbl given group .y refer key, one row tibble one column per grouping variable identifies group ... Additional arguments passed .f .keep grouping variables kept .x","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Apply a function to each group — group_map","text":"group_modify() returns grouped tibble. case .f must return data frame. group_map() returns list results calling .f group. group_walk() calls .f side effects returns input .tbl, invisibly.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Apply a function to each group — group_map","text":"Use group_modify() summarize() limited, terms need return group. group_modify() good \"data frame , data frame \". limited, need use nested split workflow. group_modify() evolution (), used . conceptual group data frame exposed function .f two pieces information: subset data group, exposed .x. key, tibble exactly one row columns grouping variable, exposed .y. completeness, group_modify(), group_map group_walk() also work ungrouped data frames, case function applied entire data frame (exposed .x), .y one row tibble column, consistently group_keys().","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Apply a function to each group — group_map","text":"","code":"# return a list mtcars %>% group_by(cyl) %>% group_map(~ head(.x, 2L)) #> [[1]] #> # A tibble: 2 × 10 #> mpg disp hp drat wt qsec vs am gear carb #> #> 1 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> #> [[2]] #> # A tibble: 2 × 10 #> mpg disp hp drat wt qsec vs am gear carb #> #> 1 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 160 110 3.9 2.88 17.0 0 1 4 4 #> #> [[3]] #> # A tibble: 2 × 10 #> mpg disp hp drat wt qsec vs am gear carb #> #> 1 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 2 14.3 360 245 3.21 3.57 15.8 0 0 3 4 #> # return a tibble grouped by `cyl` with 2 rows per group # the grouping data is recalculated mtcars %>% group_by(cyl) %>% group_modify(~ head(.x, 2L)) #> # A tibble: 6 × 11 #> # Groups: cyl [3] #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 6 21 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 8 14.3 360 245 3.21 3.57 15.8 0 0 3 4 # a list of tibbles iris %>% group_by(Species) %>% group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #> [[1]] #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.803 0.344 2.34 0.0238 #> 2 Sepal.Length 0.132 0.0685 1.92 0.0607 #> #> [[2]] #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.185 0.514 0.360 7.20e- 1 #> 2 Sepal.Length 0.686 0.0863 7.95 2.59e-10 #> #> [[3]] #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.610 0.417 1.46 1.50e- 1 #> 2 Sepal.Length 0.750 0.0630 11.9 6.30e-16 #> # a restructured grouped tibble iris %>% group_by(Species) %>% group_modify(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #> # A tibble: 6 × 6 #> # Groups: Species [3] #> Species term estimate std.error statistic p.value #> #> 1 setosa (Intercept) 0.803 0.344 2.34 2.38e- 2 #> 2 setosa Sepal.Length 0.132 0.0685 1.92 6.07e- 2 #> 3 versicolor (Intercept) 0.185 0.514 0.360 7.20e- 1 #> 4 versicolor Sepal.Length 0.686 0.0863 7.95 2.59e-10 #> 5 virginica (Intercept) 0.610 0.417 1.46 1.50e- 1 #> 6 virginica Sepal.Length 0.750 0.0630 11.9 6.30e-16 # a list of vectors iris %>% group_by(Species) %>% group_map(~ quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75))) #> [[1]] #> 25% 50% 75% #> 1.400 1.500 1.575 #> #> [[2]] #> 25% 50% 75% #> 4.00 4.35 4.60 #> #> [[3]] #> 25% 50% 75% #> 5.100 5.550 5.875 #> # to use group_modify() the lambda must return a data frame iris %>% group_by(Species) %>% group_modify(~ { quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) %>% tibble::enframe(name = \"prob\", value = \"quantile\") }) #> # A tibble: 9 × 3 #> # Groups: Species [3] #> Species prob quantile #> #> 1 setosa 25% 1.4 #> 2 setosa 50% 1.5 #> 3 setosa 75% 1.58 #> 4 versicolor 25% 4 #> 5 versicolor 50% 4.35 #> 6 versicolor 75% 4.6 #> 7 virginica 25% 5.1 #> 8 virginica 50% 5.55 #> 9 virginica 75% 5.88 iris %>% group_by(Species) %>% group_modify(~ { .x %>% purrr::map_dfc(fivenum) %>% mutate(nms = c(\"min\", \"Q1\", \"median\", \"Q3\", \"max\")) }) #> # A tibble: 15 × 6 #> # Groups: Species [3] #> Species Sepal.Length Sepal.Width Petal.Length Petal.Width nms #> #> 1 setosa 4.3 2.3 1 0.1 min #> 2 setosa 4.8 3.2 1.4 0.2 Q1 #> 3 setosa 5 3.4 1.5 0.2 median #> 4 setosa 5.2 3.7 1.6 0.3 Q3 #> 5 setosa 5.8 4.4 1.9 0.6 max #> 6 versicolor 4.9 2 3 1 min #> 7 versicolor 5.6 2.5 4 1.2 Q1 #> 8 versicolor 5.9 2.8 4.35 1.3 median #> 9 versicolor 6.3 3 4.6 1.5 Q3 #> 10 versicolor 7 3.4 5.1 1.8 max #> 11 virginica 4.9 2.2 4.5 1.4 min #> 12 virginica 6.2 2.8 5.1 1.8 Q1 #> 13 virginica 6.5 3 5.55 2 median #> 14 virginica 6.9 3.2 5.9 2.3 Q3 #> 15 virginica 7.9 3.8 6.9 2.5 max # group_walk() is for side effects dir.create(temp <- tempfile()) iris %>% group_by(Species) %>% group_walk(~ write.csv(.x, file = file.path(temp, paste0(.y$Species, \".csv\")))) list.files(temp, pattern = \"csv$\") #> [1] \"setosa.csv\" \"versicolor.csv\" \"virginica.csv\" unlink(temp, recursive = TRUE) # group_modify() and ungrouped data frames mtcars %>% group_modify(~ head(.x, 2L)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":null,"dir":"Reference","previous_headings":"","what":"Nest a tibble using a grouping specification — group_nest","title":"Nest a tibble using a grouping specification — group_nest","text":"Nest tibble using grouping specification","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Nest a tibble using a grouping specification — group_nest","text":"","code":"group_nest(.tbl, ..., .key = \"data\", keep = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Nest a tibble using a grouping specification — group_nest","text":".tbl tbl ... Grouping specification, forwarded group_by() .key name list column keep grouping columns kept list column.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Nest a tibble using a grouping specification — group_nest","text":"tbl one row per unique combination grouping variables. first columns grouping variables, followed list column tibbles matching rows remaining columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"lifecycle","dir":"Reference","previous_headings":"","what":"Lifecycle","title":"Nest a tibble using a grouping specification — group_nest","text":"group_nest() stable tidyr::nest(.=) provides similar behavior. may deprecated future.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"grouped-data-frames","dir":"Reference","previous_headings":"","what":"Grouped data frames","title":"Nest a tibble using a grouping specification — group_nest","text":"primary use case group_nest() already grouped data frames, typically result group_by(). case group_nest() uses first argument, grouped tibble, warns ... used.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"ungrouped-data-frames","dir":"Reference","previous_headings":"","what":"Ungrouped data frames","title":"Nest a tibble using a grouping specification — group_nest","text":"used ungrouped data frames, group_nest() forwards ... group_by() nesting, therefore ... subject data mask.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Nest a tibble using a grouping specification — group_nest","text":"","code":"#----- use case 1: a grouped data frame iris %>% group_by(Species) %>% group_nest() #> # A tibble: 3 × 2 #> Species data #> > #> 1 setosa [50 × 4] #> 2 versicolor [50 × 4] #> 3 virginica [50 × 4] # this can be useful if the grouped data has been altered before nesting iris %>% group_by(Species) %>% filter(Sepal.Length > mean(Sepal.Length)) %>% group_nest() #> # A tibble: 3 × 2 #> Species data #> > #> 1 setosa [22 × 4] #> 2 versicolor [24 × 4] #> 3 virginica [22 × 4] #----- use case 2: using group_nest() on a ungrouped data frame with # a grouping specification that uses the data mask starwars %>% group_nest(species, homeworld) #> # A tibble: 57 × 3 #> species homeworld data #> > #> 1 Aleena Aleen Minor [1 × 12] #> 2 Besalisk Ojom [1 × 12] #> 3 Cerean Cerea [1 × 12] #> 4 Chagrian Champala [1 × 12] #> 5 Clawdite Zolan [1 × 12] #> 6 Droid Naboo [1 × 12] #> 7 Droid Tatooine [2 × 12] #> 8 Droid NA [3 × 12] #> 9 Dug Malastare [1 × 12] #> 10 Ewok Endor [1 × 12] #> # ℹ 47 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":null,"dir":"Reference","previous_headings":"","what":"Split data frame by groups — group_split","title":"Split data frame by groups — group_split","text":"group_split() works like base::split() : uses grouping structure group_by() therefore subject data mask name elements list based grouping works well single character grouping variable. Instead, use group_keys() access data frame defines groups. group_split() primarily designed work grouped data frames. can pass ... group split ungrouped data frame, generally useful want easy access group metadata.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Split data frame by groups — group_split","text":"","code":"group_split(.tbl, ..., .keep = TRUE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Split data frame by groups — group_split","text":".tbl tbl. ... .tbl ungrouped data frame, grouping specification, forwarded group_by(). .keep grouping columns kept?","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Split data frame by groups — group_split","text":"list tibbles. tibble contains rows .tbl associated group columns, including grouping variables. Note returns list_of slightly stricter simple list useful representing lists every element type.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"lifecycle","dir":"Reference","previous_headings":"","what":"Lifecycle","title":"Split data frame by groups — group_split","text":"group_split() stable can achieve similar results manipulating nested column returned tidyr::nest(.=). also retains group keys within single data structure. group_split() may deprecated future.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Split data frame by groups — group_split","text":"","code":"ir <- iris %>% group_by(Species) group_split(ir) #> tbl_df< #> Sepal.Length: double #> Sepal.Width : double #> Petal.Length: double #> Petal.Width : double #> Species : factor #> > #> >[3]> #> [[1]] #> # A tibble: 50 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 40 more rows #> #> [[2]] #> # A tibble: 50 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 7 3.2 4.7 1.4 versicolor #> 2 6.4 3.2 4.5 1.5 versicolor #> 3 6.9 3.1 4.9 1.5 versicolor #> 4 5.5 2.3 4 1.3 versicolor #> 5 6.5 2.8 4.6 1.5 versicolor #> 6 5.7 2.8 4.5 1.3 versicolor #> 7 6.3 3.3 4.7 1.6 versicolor #> 8 4.9 2.4 3.3 1 versicolor #> 9 6.6 2.9 4.6 1.3 versicolor #> 10 5.2 2.7 3.9 1.4 versicolor #> # ℹ 40 more rows #> #> [[3]] #> # A tibble: 50 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 6.3 3.3 6 2.5 virginica #> 2 5.8 2.7 5.1 1.9 virginica #> 3 7.1 3 5.9 2.1 virginica #> 4 6.3 2.9 5.6 1.8 virginica #> 5 6.5 3 5.8 2.2 virginica #> 6 7.6 3 6.6 2.1 virginica #> 7 4.9 2.5 4.5 1.7 virginica #> 8 7.3 2.9 6.3 1.8 virginica #> 9 6.7 2.5 5.8 1.8 virginica #> 10 7.2 3.6 6.1 2.5 virginica #> # ℹ 40 more rows #> group_keys(ir) #> # A tibble: 3 × 1 #> Species #> #> 1 setosa #> 2 versicolor #> 3 virginica"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":null,"dir":"Reference","previous_headings":"","what":"Trim grouping structure — group_trim","title":"Trim grouping structure — group_trim","text":"Drop unused levels factors used grouping variables, recalculates grouping structure. group_trim() particularly useful filter() intended select subset groups.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trim grouping structure — group_trim","text":"","code":"group_trim(.tbl, .drop = group_by_drop_default(.tbl))"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Trim grouping structure — group_trim","text":".tbl grouped data frame .drop See group_by()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Trim grouping structure — group_trim","text":"grouped data frame","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Trim grouping structure — group_trim","text":"","code":"iris %>% group_by(Species) %>% filter(Species == \"setosa\", .preserve = TRUE) %>% group_trim() #> # A tibble: 50 × 5 #> # Groups: Species [1] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 40 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/grouped_df.html","id":null,"dir":"Reference","previous_headings":"","what":"A grouped data frame. — grouped_df","title":"A grouped data frame. — grouped_df","text":"easiest way create grouped data frame call group_by() method data frame tbl: take care capturing unevaluated expressions . functions designed programmatic use. data analysis purposes see group_data() accessor functions retrieve various metadata grouped data frames.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/grouped_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A grouped data frame. — grouped_df","text":"","code":"grouped_df(data, vars, drop = group_by_drop_default(data)) is.grouped_df(x) is_grouped_df(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/grouped_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A grouped data frame. — grouped_df","text":"data tbl data frame. vars character vector. drop .drop = TRUE, empty groups dropped.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":null,"dir":"Reference","previous_headings":"","what":"Flag a character vector as SQL identifiers — ident","title":"Flag a character vector as SQL identifiers — ident","text":"ident() takes unquoted strings flags identifiers. ident_q() assumes input already quoted, ensures get quoted . currently used schema.table.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Flag a character vector as SQL identifiers — ident","text":"","code":"ident(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Flag a character vector as SQL identifiers — ident","text":"... character vector, name-value pairs","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Flag a character vector as SQL identifiers — ident","text":"","code":"# Identifiers are escaped with \" ident(\"x\") #> x"},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":null,"dir":"Reference","previous_headings":"","what":"Vectorised if-else — if_else","title":"Vectorised if-else — if_else","text":"if_else() vectorized -else. Compared base R equivalent, ifelse(), function allows handle missing values condition missing always takes true, false, missing account determining output type .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Vectorised if-else — if_else","text":"","code":"if_else(condition, true, false, missing = NULL, ..., ptype = NULL, size = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Vectorised if-else — if_else","text":"condition logical vector true, false Vectors use TRUE FALSE values condition. true false recycled size condition. true, false, missing (used) cast common type. missing NULL, used value NA values condition. Follows size type rules true false. ... dots future extensions must empty. ptype optional prototype declaring desired output type. supplied, overrides common type true, false, missing. size optional size declaring desired output size. supplied, overrides size condition.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Vectorised if-else — if_else","text":"vector size condition type common type true, false, missing. condition TRUE, matching values true, FALSE, matching values false, NA, matching values missing, provided, otherwise missing value used.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Vectorised if-else — if_else","text":"","code":"x <- c(-5:5, NA) if_else(x < 0, NA, x) #> [1] NA NA NA NA NA 0 1 2 3 4 5 NA # Explicitly handle `NA` values in the `condition` with `missing` if_else(x < 0, \"negative\", \"positive\", missing = \"missing\") #> [1] \"negative\" \"negative\" \"negative\" \"negative\" \"negative\" \"positive\" #> [7] \"positive\" \"positive\" \"positive\" \"positive\" \"positive\" \"missing\" # Unlike `ifelse()`, `if_else()` preserves types x <- factor(sample(letters[1:5], 10, replace = TRUE)) ifelse(x %in% c(\"a\", \"b\", \"c\"), x, NA) #> [1] 2 3 NA 3 NA 1 2 3 2 NA if_else(x %in% c(\"a\", \"b\", \"c\"), x, NA) #> [1] b c c a b c b #> Levels: a b c d e # `if_else()` is often useful for creating new columns inside of `mutate()` starwars %>% mutate(category = if_else(height < 100, \"short\", \"tall\"), .keep = \"used\") #> # A tibble: 87 × 2 #> height category #> #> 1 172 tall #> 2 167 tall #> 3 96 short #> 4 202 tall #> 5 150 tall #> 6 178 tall #> 7 165 tall #> 8 97 short #> 9 183 tall #> 10 182 tall #> # ℹ 77 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Join specifications — join_by","title":"Join specifications — join_by","text":"join_by() constructs specification describes join two tables using small domain specific language. result can supplied argument join functions (left_join()).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Join specifications — join_by","text":"","code":"join_by(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Join specifications — join_by","text":"... Expressions specifying join. expression consist one following: Equality condition: == Inequality conditions: >=, >, <=, < Rolling helper: closest() Overlap helpers: (), within(), overlaps() expressions supported. need perform join computed variable, e.g. join_by(sales_date - 40 >= promo_date), need precompute store separate column. Column names specified quoted unquoted names. default, name left-hand side join condition refers left-hand table, unless overridden explicitly prefixing column name either x$ y$. single column name provided without join conditions, interpreted column name duplicated side ==, .e. x interpreted x == x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"join-types","dir":"Reference","previous_headings":"","what":"Join types","title":"Join specifications — join_by","text":"following types joins supported dplyr: Equality joins Inequality joins Rolling joins Overlap joins Cross joins Equality, inequality, rolling, overlap joins discussed detail . Cross joins implemented cross_join().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"equality-joins","dir":"Reference","previous_headings":"","what":"Equality joins","title":"Join specifications — join_by","text":"Equality joins require keys equal one pairs columns, common type join. construct equality join using join_by(), supply two column names join separated ==. Alternatively, supplying single name interpreted equality join two columns name. example, join_by(x) equivalent join_by(x == x).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"inequality-joins","dir":"Reference","previous_headings":"","what":"Inequality joins","title":"Join specifications — join_by","text":"Inequality joins match inequality, >, >=, <, <=, common time series analysis genomics. construct inequality join using join_by(), supply two column names separated one mentioned inequalities. Note inequality joins match single row x potentially large number rows y. extra careful constructing inequality join specifications!","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"rolling-joins","dir":"Reference","previous_headings":"","what":"Rolling joins","title":"Join specifications — join_by","text":"Rolling joins variant inequality joins limit results returned inequality join condition. useful \"rolling\" closest match forward/backwards exact match. construct rolling join, wrap inequality closest(). closest(expr) expr must inequality involving one : >, >=, <, <=. example, closest(x >= y) interpreted : value x, find closest value y less equal x value. closest() always use left-hand table (x) primary table, right-hand table (y) one find closest match , regardless inequality specified. example, closest(y$>= x$b) always interpreted closest(x$b <= y$).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"overlap-joins","dir":"Reference","previous_headings":"","what":"Overlap joins","title":"Join specifications — join_by","text":"Overlap joins special case inequality joins involving one two columns left-hand table overlapping range defined two columns right-hand table. three helpers join_by() recognizes assist constructing overlap joins, can constructed simpler inequalities. (x, y_lower, y_upper, ..., bounds = \"[]\") value x, finds everywhere value falls [y_lower, y_upper]. Equivalent x >= y_lower, x <= y_upper default. bounds can one \"[]\", \"[)\", \"(]\", \"()\" alter inclusiveness lower upper bounds. changes whether >= > <= < used build inequalities shown . Dots future extensions must empty. within(x_lower, x_upper, y_lower, y_upper) range [x_lower, x_upper], finds everywhere range falls completely within [y_lower, y_upper]. Equivalent x_lower >= y_lower, x_upper <= y_upper. inequalities used build within() regardless inclusiveness supplied ranges. overlaps(x_lower, x_upper, y_lower, y_upper, ..., bounds = \"[]\") range [x_lower, x_upper], finds everywhere range overlaps [y_lower, y_upper] capacity. Equivalent x_lower <= y_upper, x_upper >= y_lower default. bounds can one \"[]\", \"[)\", \"(]\", \"()\" alter inclusiveness lower upper bounds. \"[]\" uses <= >=, 3 options use < > generate exact inequalities. Dots future extensions must empty. conditions assume ranges well-formed non-empty, .e. x_lower <= x_upper bounds treated \"[]\", x_lower < x_upper otherwise.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"column-referencing","dir":"Reference","previous_headings":"","what":"Column referencing","title":"Join specifications — join_by","text":"specifying join conditions, join_by() assumes column names left-hand side condition refer left-hand table (x), names right-hand side condition refer right-hand table (y). Occasionally, clearer able specify right-hand table name left-hand side condition, vice versa. support , column names can prefixed x$ y$ explicitly specify table come .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Join specifications — join_by","text":"","code":"sales <- tibble( id = c(1L, 1L, 1L, 2L, 2L), sale_date = as.Date(c(\"2018-12-31\", \"2019-01-02\", \"2019-01-05\", \"2019-01-04\", \"2019-01-01\")) ) sales #> # A tibble: 5 × 2 #> id sale_date #> #> 1 1 2018-12-31 #> 2 1 2019-01-02 #> 3 1 2019-01-05 #> 4 2 2019-01-04 #> 5 2 2019-01-01 promos <- tibble( id = c(1L, 1L, 2L), promo_date = as.Date(c(\"2019-01-01\", \"2019-01-05\", \"2019-01-02\")) ) promos #> # A tibble: 3 × 2 #> id promo_date #> #> 1 1 2019-01-01 #> 2 1 2019-01-05 #> 3 2 2019-01-02 # Match `id` to `id`, and `sale_date` to `promo_date` by <- join_by(id, sale_date == promo_date) left_join(sales, promos, by) #> # A tibble: 5 × 2 #> id sale_date #> #> 1 1 2018-12-31 #> 2 1 2019-01-02 #> 3 1 2019-01-05 #> 4 2 2019-01-04 #> 5 2 2019-01-01 # For each `sale_date` within a particular `id`, # find all `promo_date`s that occurred before that particular sale by <- join_by(id, sale_date >= promo_date) left_join(sales, promos, by) #> # A tibble: 6 × 3 #> id sale_date promo_date #> #> 1 1 2018-12-31 NA #> 2 1 2019-01-02 2019-01-01 #> 3 1 2019-01-05 2019-01-01 #> 4 1 2019-01-05 2019-01-05 #> 5 2 2019-01-04 2019-01-02 #> 6 2 2019-01-01 NA # For each `sale_date` within a particular `id`, # find only the closest `promo_date` that occurred before that sale by <- join_by(id, closest(sale_date >= promo_date)) left_join(sales, promos, by) #> # A tibble: 5 × 3 #> id sale_date promo_date #> #> 1 1 2018-12-31 NA #> 2 1 2019-01-02 2019-01-01 #> 3 1 2019-01-05 2019-01-05 #> 4 2 2019-01-04 2019-01-02 #> 5 2 2019-01-01 NA # If you want to disallow exact matching in rolling joins, use `>` rather # than `>=`. Note that the promo on `2019-01-05` is no longer considered the # closest match for the sale on the same date. by <- join_by(id, closest(sale_date > promo_date)) left_join(sales, promos, by) #> # A tibble: 5 × 3 #> id sale_date promo_date #> #> 1 1 2018-12-31 NA #> 2 1 2019-01-02 2019-01-01 #> 3 1 2019-01-05 2019-01-01 #> 4 2 2019-01-04 2019-01-02 #> 5 2 2019-01-01 NA # Same as before, but also require that the promo had to occur at most 1 # day before the sale was made. We'll use a full join to see that id 2's # promo on `2019-01-02` is no longer matched to the sale on `2019-01-04`. sales <- mutate(sales, sale_date_lower = sale_date - 1) by <- join_by(id, closest(sale_date >= promo_date), sale_date_lower <= promo_date) full_join(sales, promos, by) #> # A tibble: 6 × 4 #> id sale_date sale_date_lower promo_date #> #> 1 1 2018-12-31 2018-12-30 NA #> 2 1 2019-01-02 2019-01-01 2019-01-01 #> 3 1 2019-01-05 2019-01-04 2019-01-05 #> 4 2 2019-01-04 2019-01-03 NA #> 5 2 2019-01-01 2018-12-31 NA #> 6 2 NA NA 2019-01-02 # --------------------------------------------------------------------------- segments <- tibble( segment_id = 1:4, chromosome = c(\"chr1\", \"chr2\", \"chr2\", \"chr1\"), start = c(140, 210, 380, 230), end = c(150, 240, 415, 280) ) segments #> # A tibble: 4 × 4 #> segment_id chromosome start end #> #> 1 1 chr1 140 150 #> 2 2 chr2 210 240 #> 3 3 chr2 380 415 #> 4 4 chr1 230 280 reference <- tibble( reference_id = 1:4, chromosome = c(\"chr1\", \"chr1\", \"chr2\", \"chr2\"), start = c(100, 200, 300, 415), end = c(150, 250, 399, 450) ) reference #> # A tibble: 4 × 4 #> reference_id chromosome start end #> #> 1 1 chr1 100 150 #> 2 2 chr1 200 250 #> 3 3 chr2 300 399 #> 4 4 chr2 415 450 # Find every time a segment `start` falls between the reference # `[start, end]` range. by <- join_by(chromosome, between(start, start, end)) full_join(segments, reference, by) #> # A tibble: 5 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 #> 2 2 chr2 210 240 NA NA NA #> 3 3 chr2 380 415 3 300 399 #> 4 4 chr1 230 280 2 200 250 #> 5 NA chr2 NA NA 4 415 450 # If you wanted the reference columns first, supply `reference` as `x` # and `segments` as `y`, then explicitly refer to their columns using `x$` # and `y$`. by <- join_by(chromosome, between(y$start, x$start, x$end)) full_join(reference, segments, by) #> # A tibble: 5 × 7 #> reference_id chromosome start.x end.x segment_id start.y end.y #> #> 1 1 chr1 100 150 1 140 150 #> 2 2 chr1 200 250 4 230 280 #> 3 3 chr2 300 399 3 380 415 #> 4 4 chr2 415 450 NA NA NA #> 5 NA chr2 NA NA 2 210 240 # Find every time a segment falls completely within a reference. # Sometimes using `x$` and `y$` makes your intentions clearer, even if they # match the default behavior. by <- join_by(chromosome, within(x$start, x$end, y$start, y$end)) inner_join(segments, reference, by) #> # A tibble: 1 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 # Find every time a segment overlaps a reference in any way. by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end)) full_join(segments, reference, by) #> # A tibble: 5 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 #> 2 2 chr2 210 240 NA NA NA #> 3 3 chr2 380 415 3 300 399 #> 4 3 chr2 380 415 4 415 450 #> 5 4 chr1 230 280 2 200 250 # It is common to have right-open ranges with bounds like `[)`, which would # mean an end value of `415` would no longer overlap a start value of `415`. # Setting `bounds` allows you to compute overlaps with those kinds of ranges. by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end, bounds = \"[)\")) full_join(segments, reference, by) #> # A tibble: 5 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 #> 2 2 chr2 210 240 NA NA NA #> 3 3 chr2 380 415 3 300 399 #> 4 4 chr1 230 280 2 200 250 #> 5 NA chr2 NA NA 4 415 450"},{"path":"https://dplyr.tidyverse.org/dev/reference/last_dplyr_warnings.html","id":null,"dir":"Reference","previous_headings":"","what":"Show warnings from the last command — last_dplyr_warnings","title":"Show warnings from the last command — last_dplyr_warnings","text":"Warnings occur inside dplyr verb like mutate() caught stashed away instead emitted console. prevents rowwise grouped data frames flooding console warnings. see original warnings, use last_dplyr_warnings().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/last_dplyr_warnings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Show warnings from the last command — last_dplyr_warnings","text":"","code":"last_dplyr_warnings(n = 5)"},{"path":"https://dplyr.tidyverse.org/dev/reference/last_dplyr_warnings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Show warnings from the last command — last_dplyr_warnings","text":"n Passed head() first n warnings displayed.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute lagged or leading values — lead-lag","title":"Compute lagged or leading values — lead-lag","text":"Find \"previous\" (lag()) \"next\" (lead()) values vector. Useful comparing values behind ahead current values.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute lagged or leading values — lead-lag","text":"","code":"lag(x, n = 1L, default = NULL, order_by = NULL, ...) lead(x, n = 1L, default = NULL, order_by = NULL, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute lagged or leading values — lead-lag","text":"x vector n Positive integer length 1, giving number positions lag lead default value used pad x back original size lag lead applied. default, NULL, pads missing value. supplied, must vector size 1, cast type x. order_by optional secondary vector defines ordering use applying lag lead x. supplied, must size x. ... used.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute lagged or leading values — lead-lag","text":"vector type size x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compute lagged or leading values — lead-lag","text":"","code":"lag(1:5) #> [1] NA 1 2 3 4 lead(1:5) #> [1] 2 3 4 5 NA x <- 1:5 tibble(behind = lag(x), x, ahead = lead(x)) #> # A tibble: 5 × 3 #> behind x ahead #> #> 1 NA 1 2 #> 2 1 2 3 #> 3 2 3 4 #> 4 3 4 5 #> 5 4 5 NA # If you want to look more rows behind or ahead, use `n` lag(1:5, n = 1) #> [1] NA 1 2 3 4 lag(1:5, n = 2) #> [1] NA NA 1 2 3 lead(1:5, n = 1) #> [1] 2 3 4 5 NA lead(1:5, n = 2) #> [1] 3 4 5 NA NA # If you want to define a value to pad with, use `default` lag(1:5) #> [1] NA 1 2 3 4 lag(1:5, default = 0) #> [1] 0 1 2 3 4 lead(1:5) #> [1] 2 3 4 5 NA lead(1:5, default = 6) #> [1] 2 3 4 5 6 # If the data are not already ordered, use `order_by` scrambled <- slice_sample( tibble(year = 2000:2005, value = (0:5) ^ 2), prop = 1 ) wrong <- mutate(scrambled, previous_year_value = lag(value)) arrange(wrong, year) #> # A tibble: 6 × 3 #> year value previous_year_value #> #> 1 2000 0 25 #> 2 2001 1 4 #> 3 2002 4 0 #> 4 2003 9 16 #> 5 2004 16 NA #> 6 2005 25 9 right <- mutate(scrambled, previous_year_value = lag(value, order_by = year)) arrange(right, year) #> # A tibble: 6 × 3 #> year value previous_year_value #> #> 1 2000 0 NA #> 2 2001 1 0 #> 3 2002 4 1 #> 4 2003 9 4 #> 5 2004 16 9 #> 6 2005 25 16"},{"path":"https://dplyr.tidyverse.org/dev/reference/make_tbl.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ","title":"Create a ","text":"tbl() standard constructor tbls. .tbl() coerces, .tbl() tests.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/make_tbl.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ","text":"","code":"make_tbl(subclass, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/make_tbl.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a ","text":"subclass name subclass. \"tbl\" abstract base class, must supply value. tbl_ automatically prepended class name ... tbl(), fields used class. .tbl(), arguments passed methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":null,"dir":"Reference","previous_headings":"","what":"Mutating joins — mutate-joins","title":"Mutating joins — mutate-joins","text":"Mutating joins add columns y x, matching observations based keys. four mutating joins: inner join, three outer joins.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"inner-join","dir":"Reference","previous_headings":"","what":"Inner join","title":"Mutating joins — mutate-joins","text":"inner_join() keeps observations x matching key y. important property inner join unmatched rows either input included result. means generally inner joins appropriate analyses, easy lose observations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"outer-joins","dir":"Reference","previous_headings":"","what":"Outer joins","title":"Mutating joins — mutate-joins","text":"three outer joins keep observations appear least one data frames: left_join() keeps observations x. right_join() keeps observations y. full_join() keeps observations x y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mutating joins — mutate-joins","text":"","code":"inner_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame inner_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", unmatched = \"drop\", relationship = NULL ) left_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame left_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", unmatched = \"drop\", relationship = NULL ) right_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame right_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", unmatched = \"drop\", relationship = NULL ) full_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame full_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", relationship = NULL )"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mutating joins — mutate-joins","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. join specification created join_by(), character vector variables join . NULL, default, *_join() perform natural join, using variables common across x y. message lists variables can check correct; suppress message supplying explicitly. join different variables x y, use join_by() specification. example, join_by(== b) match x$y$b. join multiple variables, use join_by() specification multiple expressions. example, join_by(== b, c == d) match x$y$b x$c y$d. column names x y, can shorten listing variable names, like join_by(, c). join_by() can also used perform inequality, rolling, overlap joins. See documentation ?join_by details types joins. simple equality joins, can alternatively specify character vector variable names join . example, = c(\"\", \"b\") joins x$y$x$b y$b. variable names differ x y, use named character vector like = c(\"x_a\" = \"y_a\", \"x_b\" = \"y_b\"). perform cross-join, generating combinations x y, see cross_join(). copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . suffix non-joined duplicate variables x y, suffixes added output disambiguate . character vector length 2. ... parameters passed onto methods. keep join keys x y preserved output? NULL, default, joins equality retain keys x, joins inequality retain keys inputs. TRUE, keys inputs retained. FALSE, keys x retained. right full joins, data key columns corresponding rows exist y merged key columns x. used joining inequality conditions. na_matches two NA two NaN values match? \"na\", default, treats two NA two NaN values equal, like %%, match(), merge(). \"never\" treats two NA two NaN values different, never match together values. similar joins database sources base::merge(incomparables = NA). multiple Handling rows x multiple matches y. row x: \"\", default, returns every match detected y. behavior SQL. \"\" returns one match detected y, guarantees match returned. often faster \"first\" \"last\" just need detect least one match. \"first\" returns first match detected y. \"last\" returns last match detected y. unmatched unmatched keys result dropped rows handled? \"drop\" drops unmatched keys result. \"error\" throws error unmatched keys detected. unmatched intended protect accidentally dropping rows join. checks unmatched keys input potentially drop rows. left joins, checks y. right joins, checks x. inner joins, checks x y. case, unmatched also allowed character vector length 2 specify behavior x y independently. relationship Handling expected relationship keys x y. expectations chosen list invalidated, error thrown. NULL, default, expect relationship x y. However, equality joins check many--many relationship (typically unexpected) warn one occurs, encouraging either take closer look inputs make relationship explicit specifying \"many--many\". See Many--many relationships section details. \"one--one\" expects: row x matches 1 row y. row y matches 1 row x. \"one--many\" expects: row y matches 1 row x. \"many--one\" expects: row x matches 1 row y. \"many--many\" perform relationship checks, provided allow explicit relationship know exists. relationship handle cases zero matches. , see unmatched.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mutating joins — mutate-joins","text":"object type x (including groups). order rows columns x preserved much possible. output following properties: rows affect join type. inner_join() returns matched x rows. left_join() returns x rows. right_join() returns matched x rows, followed unmatched y rows. full_join() returns x rows, followed unmatched y rows. Output columns include columns x non-key columns y. keep = TRUE, key columns y included well. non-key columns x y name, suffixes added disambiguate. keep = TRUE key columns x y name, suffixes added disambiguate well. keep = FALSE, output columns included coerced common type x y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"many-to-many-relationships","dir":"Reference","previous_headings":"","what":"Many-to-many relationships","title":"Mutating joins — mutate-joins","text":"default, dplyr guards many--many relationships equality joins throwing warning. occur following true: row x matches multiple rows y. row y matches multiple rows x. typically surprising, joins involve relationship one--one, one--many, many--one, often result improperly specified join. Many--many relationships particularly problematic can result Cartesian explosion number rows returned join. many--many relationship expected, silence warning explicitly setting relationship = \"many--many\". production code, best preemptively set relationship whatever relationship expect exist keys x y, forces error occur immediately data align expectations. Inequality joins typically result many--many relationships nature, warn default, still take extra care specifying inequality join, also capability return large number rows. Rolling joins warn many--many relationships either, many rolling joins follow many--one relationship, often useful set relationship = \"many--one\" enforce . Note SQL, database providers let specify many--many relationship two tables, instead requiring create third junction table results two one--many relationships instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Mutating joins — mutate-joins","text":"functions generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: inner_join(): dbplyr (tbl_lazy), dplyr (data.frame) . left_join(): dbplyr (tbl_lazy), dplyr (data.frame) . right_join(): dbplyr (tbl_lazy), dplyr (data.frame) . full_join(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mutating joins — mutate-joins","text":"","code":"band_members %>% inner_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 2 × 3 #> name band plays #> #> 1 John Beatles guitar #> 2 Paul Beatles bass band_members %>% left_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 3 × 3 #> name band plays #> #> 1 Mick Stones NA #> 2 John Beatles guitar #> 3 Paul Beatles bass band_members %>% right_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 3 × 3 #> name band plays #> #> 1 John Beatles guitar #> 2 Paul Beatles bass #> 3 Keith NA guitar band_members %>% full_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 4 × 3 #> name band plays #> #> 1 Mick Stones NA #> 2 John Beatles guitar #> 3 Paul Beatles bass #> 4 Keith NA guitar # To suppress the message about joining variables, supply `by` band_members %>% inner_join(band_instruments, by = join_by(name)) #> # A tibble: 2 × 3 #> name band plays #> #> 1 John Beatles guitar #> 2 Paul Beatles bass # This is good practice in production code # Use an equality expression if the join variables have different names band_members %>% full_join(band_instruments2, by = join_by(name == artist)) #> # A tibble: 4 × 3 #> name band plays #> #> 1 Mick Stones NA #> 2 John Beatles guitar #> 3 Paul Beatles bass #> 4 Keith NA guitar # By default, the join keys from `x` and `y` are coalesced in the output; use # `keep = TRUE` to keep the join keys from both `x` and `y` band_members %>% full_join(band_instruments2, by = join_by(name == artist), keep = TRUE) #> # A tibble: 4 × 4 #> name band artist plays #> #> 1 Mick Stones NA NA #> 2 John Beatles John guitar #> 3 Paul Beatles Paul bass #> 4 NA NA Keith guitar # If a row in `x` matches multiple rows in `y`, all the rows in `y` will be # returned once for each matching row in `x`. df1 <- tibble(x = 1:3) df2 <- tibble(x = c(1, 1, 2), y = c(\"first\", \"second\", \"third\")) df1 %>% left_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 4 × 2 #> x y #> #> 1 1 first #> 2 1 second #> 3 2 third #> 4 3 NA # If a row in `y` also matches multiple rows in `x`, this is known as a # many-to-many relationship, which is typically a result of an improperly # specified join or some kind of messy data. In this case, a warning is # thrown by default: df3 <- tibble(x = c(1, 1, 1, 3)) df3 %>% left_join(df2) #> Joining with `by = join_by(x)` #> Warning: Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #> \"many-to-many\"` to silence this warning. #> # A tibble: 7 × 2 #> x y #> #> 1 1 first #> 2 1 second #> 3 1 first #> 4 1 second #> 5 1 first #> 6 1 second #> 7 3 NA # In the rare case where a many-to-many relationship is expected, set # `relationship = \"many-to-many\"` to silence this warning df3 %>% left_join(df2, relationship = \"many-to-many\") #> Joining with `by = join_by(x)` #> # A tibble: 7 × 2 #> x y #> #> 1 1 first #> 2 1 second #> 3 1 first #> 4 1 second #> 5 1 first #> 6 1 second #> 7 3 NA # Use `join_by()` with a condition other than `==` to perform an inequality # join. Here we match on every instance where `df1$x > df2$x`. df1 %>% left_join(df2, join_by(x > x)) #> # A tibble: 6 × 3 #> x.x x.y y #> #> 1 1 NA NA #> 2 2 1 first #> 3 2 1 second #> 4 3 1 first #> 5 3 1 second #> 6 3 2 third # By default, NAs match other NAs so that there are two # rows in the output of this join: df1 <- data.frame(x = c(1, NA), y = 2) df2 <- data.frame(x = c(1, NA), z = 3) left_join(df1, df2) #> Joining with `by = join_by(x)` #> x y z #> 1 1 2 3 #> 2 NA 2 3 # You can optionally request that NAs don't match, giving a # a result that more closely resembles SQL joins left_join(df1, df2, na_matches = \"never\") #> Joining with `by = join_by(x)` #> x y z #> 1 1 2 3 #> 2 NA 2 NA"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":null,"dir":"Reference","previous_headings":"","what":"Create, modify, and delete columns — mutate","title":"Create, modify, and delete columns — mutate","text":"mutate() creates new columns functions existing variables. can also modify (name existing column) delete columns (setting value NULL).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create, modify, and delete columns — mutate","text":"","code":"mutate(.data, ...) # S3 method for data.frame mutate( .data, ..., .by = NULL, .keep = c(\"all\", \"used\", \"unused\", \"none\"), .before = NULL, .after = NULL )"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create, modify, and delete columns — mutate","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs. name gives name column output. value can : vector length 1, recycled correct length. vector length current group (whole data frame ungrouped). NULL, remove column. data frame tibble, create multiple columns output. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .keep Control columns .data retained output. Grouping columns columns created ... always kept. \"\" retains columns .data. default. \"used\" retains columns used ... create new columns. useful checking work, displays inputs outputs side--side. \"unused\" retains columns used ... create new columns. useful generate new columns, longer need columns used generate . \"none\" retain extra columns .data. grouping variables columns created ... kept. ., . Optionally, control new columns appear (default add right hand side). See relocate() details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create, modify, and delete columns — mutate","text":"object type .data. output following properties: Columns .data preserved according .keep argument. Existing columns modified ... always returned original location. New columns created ... placed according ..arguments. number rows affected. Columns given value NULL removed. Groups recomputed grouping variable mutated. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"useful-mutate-functions","dir":"Reference","previous_headings":"","what":"Useful mutate functions","title":"Create, modify, and delete columns — mutate","text":"+, -, log(), etc., usual mathematical meanings lead(), lag() dense_rank(), min_rank(), percent_rank(), row_number(), cume_dist(), ntile() cumsum(), cummean(), cummin(), cummax(), cumany(), cumall() na_if(), coalesce() if_else(), recode(), case_when()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"grouped-tibbles","dir":"Reference","previous_headings":"","what":"Grouped tibbles","title":"Create, modify, and delete columns — mutate","text":"mutating expressions computed within groups, may yield different results grouped tibbles. case soon aggregating, lagging, ranking function involved. Compare ungrouped mutate: grouped equivalent: former normalises mass global average whereas latter normalises averages within species levels.","code":"starwars %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) starwars %>% select(name, mass, species) %>% group_by(species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE))"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Create, modify, and delete columns — mutate","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create, modify, and delete columns — mutate","text":"","code":"# Newly created variables are available immediately starwars %>% select(name, mass) %>% mutate( mass2 = mass * 2, mass2_squared = mass2 * mass2 ) #> # A tibble: 87 × 4 #> name mass mass2 mass2_squared #> #> 1 Luke Skywalker 77 154 23716 #> 2 C-3PO 75 150 22500 #> 3 R2-D2 32 64 4096 #> 4 Darth Vader 136 272 73984 #> 5 Leia Organa 49 98 9604 #> 6 Owen Lars 120 240 57600 #> 7 Beru Whitesun Lars 75 150 22500 #> 8 R5-D4 32 64 4096 #> 9 Biggs Darklighter 84 168 28224 #> 10 Obi-Wan Kenobi 77 154 23716 #> # ℹ 77 more rows # As well as adding new variables, you can use mutate() to # remove variables and modify existing variables. starwars %>% select(name, height, mass, homeworld) %>% mutate( mass = NULL, height = height * 0.0328084 # convert to feet ) #> # A tibble: 87 × 3 #> name height homeworld #> #> 1 Luke Skywalker 5.64 Tatooine #> 2 C-3PO 5.48 Tatooine #> 3 R2-D2 3.15 Naboo #> 4 Darth Vader 6.63 Tatooine #> 5 Leia Organa 4.92 Alderaan #> 6 Owen Lars 5.84 Tatooine #> 7 Beru Whitesun Lars 5.41 Tatooine #> 8 R5-D4 3.18 Tatooine #> 9 Biggs Darklighter 6.00 Tatooine #> 10 Obi-Wan Kenobi 5.97 Stewjon #> # ℹ 77 more rows # Use across() with mutate() to apply a transformation # to multiple columns in a tibble. starwars %>% select(name, homeworld, species) %>% mutate(across(!name, as.factor)) #> # A tibble: 87 × 3 #> name homeworld species #> #> 1 Luke Skywalker Tatooine Human #> 2 C-3PO Tatooine Droid #> 3 R2-D2 Naboo Droid #> 4 Darth Vader Tatooine Human #> 5 Leia Organa Alderaan Human #> 6 Owen Lars Tatooine Human #> 7 Beru Whitesun Lars Tatooine Human #> 8 R5-D4 Tatooine Droid #> 9 Biggs Darklighter Tatooine Human #> 10 Obi-Wan Kenobi Stewjon Human #> # ℹ 77 more rows # see more in ?across # Window functions are useful for grouped mutates: starwars %>% select(name, mass, homeworld) %>% group_by(homeworld) %>% mutate(rank = min_rank(desc(mass))) #> # A tibble: 87 × 4 #> # Groups: homeworld [49] #> name mass homeworld rank #> #> 1 Luke Skywalker 77 Tatooine 5 #> 2 C-3PO 75 Tatooine 6 #> 3 R2-D2 32 Naboo 6 #> 4 Darth Vader 136 Tatooine 1 #> 5 Leia Organa 49 Alderaan 2 #> 6 Owen Lars 120 Tatooine 2 #> 7 Beru Whitesun Lars 75 Tatooine 6 #> 8 R5-D4 32 Tatooine 8 #> 9 Biggs Darklighter 84 Tatooine 3 #> 10 Obi-Wan Kenobi 77 Stewjon 1 #> # ℹ 77 more rows # see `vignette(\"window-functions\")` for more details # By default, new columns are placed on the far right. df <- tibble(x = 1, y = 2) df %>% mutate(z = x + y) #> # A tibble: 1 × 3 #> x y z #> #> 1 1 2 3 df %>% mutate(z = x + y, .before = 1) #> # A tibble: 1 × 3 #> z x y #> #> 1 3 1 2 df %>% mutate(z = x + y, .after = x) #> # A tibble: 1 × 3 #> x z y #> #> 1 1 3 2 # By default, mutate() keeps all columns from the input data. df <- tibble(x = 1, y = 2, a = \"a\", b = \"b\") df %>% mutate(z = x + y, .keep = \"all\") # the default #> # A tibble: 1 × 5 #> x y a b z #> #> 1 1 2 a b 3 df %>% mutate(z = x + y, .keep = \"used\") #> # A tibble: 1 × 3 #> x y z #> #> 1 1 2 3 df %>% mutate(z = x + y, .keep = \"unused\") #> # A tibble: 1 × 3 #> a b z #> #> 1 a b 3 df %>% mutate(z = x + y, .keep = \"none\") #> # A tibble: 1 × 1 #> z #> #> 1 3 # Grouping ---------------------------------------- # The mutate operation may yield different results on grouped # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: starwars %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> name mass species mass_norm #> #> 1 Luke Skywalker 77 Human 0.791 #> 2 C-3PO 75 Droid 0.771 #> 3 R2-D2 32 Droid 0.329 #> 4 Darth Vader 136 Human 1.40 #> 5 Leia Organa 49 Human 0.504 #> 6 Owen Lars 120 Human 1.23 #> 7 Beru Whitesun Lars 75 Human 0.771 #> 8 R5-D4 32 Droid 0.329 #> 9 Biggs Darklighter 84 Human 0.863 #> 10 Obi-Wan Kenobi 77 Human 0.791 #> # ℹ 77 more rows # Whereas this normalises `mass` by the averages within species # levels: starwars %>% select(name, mass, species) %>% group_by(species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> # Groups: species [38] #> name mass species mass_norm #> #> 1 Luke Skywalker 77 Human 0.947 #> 2 C-3PO 75 Droid 1.08 #> 3 R2-D2 32 Droid 0.459 #> 4 Darth Vader 136 Human 1.67 #> 5 Leia Organa 49 Human 0.603 #> 6 Owen Lars 120 Human 1.48 #> 7 Beru Whitesun Lars 75 Human 0.922 #> 8 R5-D4 32 Droid 0.459 #> 9 Biggs Darklighter 84 Human 1.03 #> 10 Obi-Wan Kenobi 77 Human 0.947 #> # ℹ 77 more rows # Indirection ---------------------------------------- # Refer to column names stored as strings with the `.data` pronoun: vars <- c(\"mass\", \"height\") mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) #> # A tibble: 87 × 15 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Leia Org… 150 49 brown light brown 19 fema… #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Beru Whi… 165 75 brown light blue 47 fema… #> 8 R5-D4 97 32 NA white, red red NA none #> 9 Biggs Da… 183 84 black light brown 24 male #> 10 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 7 more variables: gender , homeworld , species , #> # films , vehicles , starships , prod # Learn more in ?rlang::args_data_masking"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Mutate multiple columns — mutate_all","title":"Mutate multiple columns — mutate_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants mutate() transmute() make easy apply transformation multiple variables. three variants: _all affects every variable _at affects variables selected character vector vars() _if affects variables selected predicate function:","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mutate multiple columns — mutate_all","text":"","code":"mutate_all(.tbl, .funs, ...) mutate_if(.tbl, .predicate, .funs, ...) mutate_at(.tbl, .vars, .funs, ..., .cols = NULL) transmute_all(.tbl, .funs, ...) transmute_if(.tbl, .predicate, .funs, ...) transmute_at(.tbl, .vars, .funs, ..., .cols = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mutate multiple columns — mutate_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .cols argument renamed .vars fit dplyr's terminology deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mutate multiple columns — mutate_all","text":"data frame. default, newly created columns shortest names needed uniquely identify output. force inclusion name, even needed, name input (see examples details).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Mutate multiple columns — mutate_all","text":"applied grouped tibble, operations applied grouping variables. behaviour depends whether selection implicit (selections) explicit (selections). Grouping variables covered explicit selections mutate_at() transmute_at() always error. Add -group_cols() vars() selection avoid : remove group_vars() character vector column names: Grouping variables covered implicit selections ignored mutate_all(), transmute_all(), mutate_if(), transmute_if().","code":"data %>% mutate_at(vars(-group_cols(), ...), myoperation) nms <- setdiff(nms, group_vars(data)) data %>% mutate_at(vars, myoperation)"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"naming","dir":"Reference","previous_headings":"","what":"Naming","title":"Mutate multiple columns — mutate_all","text":"names new columns derived names input variables names functions. one unnamed function (.e. .funs unnamed list length one), names input variables used name new columns; _at functions, one unnamed variable (.e., .vars form vars(a_single_column)) .funs length greater one, names functions used name new columns; otherwise, new names created concatenating names input variables names functions, separated underscore \"_\". .funs argument can named unnamed list. function unnamed name derived automatically, name form \"fn#\" used. Similarly, vars() accepts named unnamed arguments. variable .vars named, new column name created. Name collisions new columns disambiguated using unique suffix.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mutate multiple columns — mutate_all","text":"","code":"iris <- as_tibble(iris) # All variants can be passed functions and additional arguments, # purrr-style. The _at() variants directly support strings. Here # we'll scale the variables `height` and `mass`: scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm) starwars %>% mutate_at(c(\"height\", \"mass\"), scale2) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… NA NA blond fair blue 19 male #> 2 C-3PO NA NA NA gold yellow 112 none #> 3 R2-D2 NA NA NA white, bl… red 33 none #> 4 Darth Va… NA NA none white yellow 41.9 male #> 5 Leia Org… NA NA brown light brown 19 fema… #> 6 Owen Lars NA NA brown, gr… light blue 52 male #> 7 Beru Whi… NA NA brown light blue 47 fema… #> 8 R5-D4 NA NA NA white, red red NA none #> 9 Biggs Da… NA NA black light brown 24 male #> 10 Obi-Wan … NA NA auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # -> starwars %>% mutate(across(c(\"height\", \"mass\"), scale2)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… NA NA blond fair blue 19 male #> 2 C-3PO NA NA NA gold yellow 112 none #> 3 R2-D2 NA NA NA white, bl… red 33 none #> 4 Darth Va… NA NA none white yellow 41.9 male #> 5 Leia Org… NA NA brown light brown 19 fema… #> 6 Owen Lars NA NA brown, gr… light blue 52 male #> 7 Beru Whi… NA NA brown light blue 47 fema… #> 8 R5-D4 NA NA NA white, red red NA none #> 9 Biggs Da… NA NA black light brown 24 male #> 10 Obi-Wan … NA NA auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # You can pass additional arguments to the function: starwars %>% mutate_at(c(\"height\", \"mass\"), scale2, na.rm = TRUE) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue 19 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 112 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red 33 none #> 4 Darth… 0.788 0.228 none white yellow 41.9 male #> 5 Leia … -0.708 -0.285 brown light brown 19 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue 52 male #> 7 Beru … -0.276 -0.132 brown light blue 47 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown 24 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% mutate_at(c(\"height\", \"mass\"), ~scale2(., na.rm = TRUE)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue 19 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 112 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red 33 none #> 4 Darth… 0.788 0.228 none white yellow 41.9 male #> 5 Leia … -0.708 -0.285 brown light brown 19 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue 52 male #> 7 Beru … -0.276 -0.132 brown light blue 47 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown 24 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # -> starwars %>% mutate(across(c(\"height\", \"mass\"), ~ scale2(.x, na.rm = TRUE))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue 19 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 112 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red 33 none #> 4 Darth… 0.788 0.228 none white yellow 41.9 male #> 5 Leia … -0.708 -0.285 brown light brown 19 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue 52 male #> 7 Beru … -0.276 -0.132 brown light blue 47 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown 24 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): iris %>% mutate_at(vars(matches(\"Sepal\")), log) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 1.63 1.25 1.4 0.2 setosa #> 2 1.59 1.10 1.4 0.2 setosa #> 3 1.55 1.16 1.3 0.2 setosa #> 4 1.53 1.13 1.5 0.2 setosa #> 5 1.61 1.28 1.4 0.2 setosa #> 6 1.69 1.36 1.7 0.4 setosa #> 7 1.53 1.22 1.4 0.3 setosa #> 8 1.61 1.22 1.5 0.2 setosa #> 9 1.48 1.06 1.4 0.2 setosa #> 10 1.59 1.13 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(matches(\"Sepal\"), log)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 1.63 1.25 1.4 0.2 setosa #> 2 1.59 1.10 1.4 0.2 setosa #> 3 1.55 1.16 1.3 0.2 setosa #> 4 1.53 1.13 1.5 0.2 setosa #> 5 1.61 1.28 1.4 0.2 setosa #> 6 1.69 1.36 1.7 0.4 setosa #> 7 1.53 1.22 1.4 0.3 setosa #> 8 1.61 1.22 1.5 0.2 setosa #> 9 1.48 1.06 1.4 0.2 setosa #> 10 1.59 1.13 1.5 0.1 setosa #> # ℹ 140 more rows # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we divide all the numeric columns by 100: starwars %>% mutate_if(is.numeric, scale2, na.rm = TRUE) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue -0.443 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 0.158 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red -0.353 none #> 4 Darth… 0.788 0.228 none white yellow -0.295 male #> 5 Leia … -0.708 -0.285 brown light brown -0.443 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue -0.230 male #> 7 Beru … -0.276 -0.132 brown light blue -0.262 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown -0.411 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray -0.198 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% mutate(across(where(is.numeric), ~ scale2(.x, na.rm = TRUE))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue -0.443 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 0.158 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red -0.353 none #> 4 Darth… 0.788 0.228 none white yellow -0.295 male #> 5 Leia … -0.708 -0.285 brown light brown -0.443 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue -0.230 male #> 7 Beru … -0.276 -0.132 brown light blue -0.262 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown -0.411 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray -0.198 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # mutate_if() is particularly useful for transforming variables from # one type to another iris %>% mutate_if(is.factor, as.character) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate_if(is.double, as.integer) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 3 1 0 setosa #> 2 4 3 1 0 setosa #> 3 4 3 1 0 setosa #> 4 4 3 1 0 setosa #> 5 5 3 1 0 setosa #> 6 5 3 1 0 setosa #> 7 4 3 1 0 setosa #> 8 5 3 1 0 setosa #> 9 4 2 1 0 setosa #> 10 4 3 1 0 setosa #> # ℹ 140 more rows # -> iris %>% mutate(across(where(is.factor), as.character)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(where(is.double), as.integer)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 3 1 0 setosa #> 2 4 3 1 0 setosa #> 3 4 3 1 0 setosa #> 4 4 3 1 0 setosa #> 5 5 3 1 0 setosa #> 6 5 3 1 0 setosa #> 7 4 3 1 0 setosa #> 8 5 3 1 0 setosa #> 9 4 2 1 0 setosa #> 10 4 3 1 0 setosa #> # ℹ 140 more rows # Multiple transformations ---------------------------------------- # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: iris %>% mutate_if(is.numeric, list(scale2, log)) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_fn1 , Sepal.Width_fn1 , #> # Petal.Length_fn1 , Petal.Width_fn1 , #> # Sepal.Length_fn2 , Sepal.Width_fn2 , #> # Petal.Length_fn2 , Petal.Width_fn2 iris %>% mutate_if(is.numeric, list(~scale2(.), ~log(.))) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_scale2 , #> # Sepal.Width_scale2 , Petal.Length_scale2 , #> # Petal.Width_scale2 , Sepal.Length_log , #> # Sepal.Width_log , Petal.Length_log , Petal.Width_log iris %>% mutate_if(is.numeric, list(scale = scale2, log = log)) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_scale , Sepal.Width_scale , #> # Petal.Length_scale , Petal.Width_scale , #> # Sepal.Length_log , Sepal.Width_log , #> # Petal.Length_log , Petal.Width_log # -> iris %>% as_tibble() %>% mutate(across(where(is.numeric), list(scale = scale2, log = log))) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_scale , Sepal.Length_log , #> # Sepal.Width_scale , Sepal.Width_log , #> # Petal.Length_scale , Petal.Length_log , #> # Petal.Width_scale , Petal.Width_log # When there's only one function in the list, it modifies existing # variables in place. Give it a name to instead create new variables: iris %>% mutate_if(is.numeric, list(scale2)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 -0.898 1.02 -1.34 -1.31 setosa #> 2 -1.14 -0.132 -1.34 -1.31 setosa #> 3 -1.38 0.327 -1.39 -1.31 setosa #> 4 -1.50 0.0979 -1.28 -1.31 setosa #> 5 -1.02 1.25 -1.34 -1.31 setosa #> 6 -0.535 1.93 -1.17 -1.05 setosa #> 7 -1.50 0.786 -1.34 -1.18 setosa #> 8 -1.02 0.786 -1.28 -1.31 setosa #> 9 -1.74 -0.361 -1.34 -1.31 setosa #> 10 -1.14 0.0979 -1.28 -1.44 setosa #> # ℹ 140 more rows iris %>% mutate_if(is.numeric, list(scale = scale2)) #> # A tibble: 150 × 9 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 4 more variables: Sepal.Length_scale , Sepal.Width_scale , #> # Petal.Length_scale , Petal.Width_scale "},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":null,"dir":"Reference","previous_headings":"","what":"Count unique combinations — n_distinct","title":"Count unique combinations — n_distinct","text":"n_distinct() counts number unique/distinct combinations set one vectors. faster concise equivalent nrow(unique(data.frame(...))).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Count unique combinations — n_distinct","text":"","code":"n_distinct(..., na.rm = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Count unique combinations — n_distinct","text":"... Unnamed vectors. multiple vectors supplied, length. na.rm TRUE, exclude missing observations count. multiple vectors ..., observation excluded values missing.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Count unique combinations — n_distinct","text":"single number.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Count unique combinations — n_distinct","text":"","code":"x <- c(1, 1, 2, 2, 2) n_distinct(x) #> [1] 2 y <- c(3, 3, NA, 3, 3) n_distinct(y) #> [1] 2 n_distinct(y, na.rm = TRUE) #> [1] 1 # Pairs (1, 3), (2, 3), and (2, NA) are distinct n_distinct(x, y) #> [1] 3 # (2, NA) is dropped, leaving 2 distinct combinations n_distinct(x, y, na.rm = TRUE) #> [1] 2 # Also works with data frames n_distinct(data.frame(x, y)) #> [1] 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert values to NA — na_if","title":"Convert values to NA — na_if","text":"translation SQL command NULLIF. useful want convert annoying value NA.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert values to NA — na_if","text":"","code":"na_if(x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert values to NA — na_if","text":"x Vector modify y Value vector compare . x y equal, value x replaced NA. y cast type x comparison. y recycled size x comparison. means y can vector size x, time single value.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert values to NA — na_if","text":"modified version x replaces values equal y NA.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert values to NA — na_if","text":"","code":"na_if(1:5, 5:1) #> [1] 1 2 NA 4 5 x <- c(1, -1, 0, 10) 100 / x #> [1] 100 -100 Inf 10 100 / na_if(x, 0) #> [1] 100 -100 NA 10 y <- c(\"abc\", \"def\", \"\", \"ghi\") na_if(y, \"\") #> [1] \"abc\" \"def\" NA \"ghi\" # `na_if()` allows you to replace `NaN` with `NA`, # even though `NaN == NaN` returns `NA` z <- c(1, NaN, NA, 2, NaN) na_if(z, NaN) #> [1] 1 NA NA 2 NA # `na_if()` is particularly useful inside `mutate()`, # and is meant for use with vectors rather than entire data frames starwars %>% select(name, eye_color) %>% mutate(eye_color = na_if(eye_color, \"unknown\")) #> # A tibble: 87 × 2 #> name eye_color #> #> 1 Luke Skywalker blue #> 2 C-3PO yellow #> 3 R2-D2 red #> 4 Darth Vader yellow #> 5 Leia Organa brown #> 6 Owen Lars blue #> 7 Beru Whitesun Lars blue #> 8 R5-D4 red #> 9 Biggs Darklighter brown #> 10 Obi-Wan Kenobi blue-gray #> # ℹ 77 more rows # `na_if()` can also be used with `mutate()` and `across()` # to alter multiple columns starwars %>% mutate(across(where(is.character), ~na_if(., \"unknown\"))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Leia Org… 150 49 brown light brown 19 fema… #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Beru Whi… 165 75 brown light blue 47 fema… #> 8 R5-D4 97 32 NA white, red red NA none #> 9 Biggs Da… 183 84 black light brown 24 male #> 10 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":null,"dir":"Reference","previous_headings":"","what":"Compare two numeric vectors — near","title":"Compare two numeric vectors — near","text":"safe way comparing two vectors floating point numbers (pairwise) equal. safer using ==, built tolerance","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compare two numeric vectors — near","text":"","code":"near(x, y, tol = .Machine$double.eps^0.5)"},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compare two numeric vectors — near","text":"x, y Numeric vectors compare tol Tolerance comparison.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compare two numeric vectors — near","text":"","code":"sqrt(2) ^ 2 == 2 #> [1] FALSE near(sqrt(2) ^ 2, 2) #> [1] TRUE"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Nest by one or more variables — nest_by","title":"Nest by one or more variables — nest_by","text":"nest_by() closely related group_by(). However, instead storing group structure metadata, made explicit data, giving group key single row along list-column data frames contain data. nest_by() returns rowwise data frame, makes operations grouped data particularly elegant. See vignette(\"rowwise\") details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Nest by one or more variables — nest_by","text":"","code":"nest_by(.data, ..., .key = \"data\", .keep = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Nest by one or more variables — nest_by","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... group_by(), variables computations group . Computations always done ungrouped data frame. perform computations grouped data, need use separate mutate() step group_by(). Computations allowed nest_by(). ungroup(), variables remove grouping. .key Name list column .keep grouping columns kept list column.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Nest by one or more variables — nest_by","text":"rowwise data frame. output following properties: rows come underlying group_keys(). columns grouping keys plus one list-column data frames. Data frame attributes preserved, nest_by() fundamentally creates new data frame. tbl one row per unique combination grouping variables. first columns grouping variables, followed list column tibbles matching rows remaining columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Nest by one or more variables — nest_by","text":"Note df %>% nest_by(x, y) roughly equivalent want unnest nested data frame, can either use tidyr::unnest() take advantage reframe()s multi-row behaviour:","code":"df %>% group_by(x, y) %>% summarise(data = list(pick(everything()))) %>% rowwise() nested %>% reframe(data)"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"lifecycle","dir":"Reference","previous_headings":"","what":"Lifecycle","title":"Nest by one or more variables — nest_by","text":"nest_by() stable tidyr::nest(.=) provides similar behavior. may deprecated future.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Nest by one or more variables — nest_by","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dplyr (data.frame, grouped_df) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Nest by one or more variables — nest_by","text":"","code":"# After nesting, you get one row per group iris %>% nest_by(Species) #> # A tibble: 3 × 2 #> # Rowwise: Species #> Species data #> > #> 1 setosa [50 × 4] #> 2 versicolor [50 × 4] #> 3 virginica [50 × 4] starwars %>% nest_by(species) #> # A tibble: 38 × 2 #> # Rowwise: species #> species data #> > #> 1 Aleena [1 × 13] #> 2 Besalisk [1 × 13] #> 3 Cerean [1 × 13] #> 4 Chagrian [1 × 13] #> 5 Clawdite [1 × 13] #> 6 Droid [6 × 13] #> 7 Dug [1 × 13] #> 8 Ewok [1 × 13] #> 9 Geonosian [1 × 13] #> 10 Gungan [3 × 13] #> # ℹ 28 more rows # The output is grouped by row, which makes modelling particularly easy models <- mtcars %>% nest_by(cyl) %>% mutate(model = list(lm(mpg ~ wt, data = data))) models #> # A tibble: 3 × 3 #> # Rowwise: cyl #> cyl data model #> > #> 1 4 [11 × 10] #> 2 6 [7 × 10] #> 3 8 [14 × 10] models %>% summarise(rsq = summary(model)$r.squared) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rsq #> #> 1 4 0.509 #> 2 6 0.465 #> 3 8 0.423 # This is particularly elegant with the broom functions models %>% summarise(broom::glance(model)) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 13 #> # Groups: cyl [3] #> cyl r.squared adj.r.squared sigma statistic p.value df logLik AIC #> #> 1 4 0.509 0.454 3.33 9.32 0.0137 1 -27.7 61.5 #> 2 6 0.465 0.357 1.17 4.34 0.0918 1 -9.83 25.7 #> 3 8 0.423 0.375 2.02 8.80 0.0118 1 -28.7 63.3 #> # ℹ 4 more variables: BIC , deviance , df.residual , #> # nobs models %>% reframe(broom::tidy(model)) #> # A tibble: 6 × 6 #> cyl term estimate std.error statistic p.value #> #> 1 4 (Intercept) 39.6 4.35 9.10 0.00000777 #> 2 4 wt -5.65 1.85 -3.05 0.0137 #> 3 6 (Intercept) 28.4 4.18 6.79 0.00105 #> 4 6 wt -2.78 1.33 -2.08 0.0918 #> 5 8 (Intercept) 23.9 3.01 7.94 0.00000405 #> 6 8 wt -2.19 0.739 -2.97 0.0118 # Note that you can also `reframe()` to unnest the data models %>% reframe(data) #> # A tibble: 32 × 11 #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 4 22.8 141. 95 3.92 3.15 22.9 1 0 4 2 #> 4 4 32.4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 5 4 30.4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 6 4 33.9 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 7 4 21.5 120. 97 3.7 2.46 20.0 1 0 3 1 #> 8 4 27.3 79 66 4.08 1.94 18.9 1 1 4 1 #> 9 4 26 120. 91 4.43 2.14 16.7 0 1 5 2 #> 10 4 30.4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> # ℹ 22 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":null,"dir":"Reference","previous_headings":"","what":"Nest join — nest_join","title":"Nest join — nest_join","text":"nest join leaves x almost unchanged, except adds new list-column, element contains rows y match corresponding row x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Nest join — nest_join","text":"","code":"nest_join(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ...) # S3 method for data.frame nest_join( x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ..., na_matches = c(\"na\", \"never\"), unmatched = \"drop\" )"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Nest join — nest_join","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. join specification created join_by(), character vector variables join . NULL, default, *_join() perform natural join, using variables common across x y. message lists variables can check correct; suppress message supplying explicitly. join different variables x y, use join_by() specification. example, join_by(== b) match x$y$b. join multiple variables, use join_by() specification multiple expressions. example, join_by(== b, c == d) match x$y$b x$c y$d. column names x y, can shorten listing variable names, like join_by(, c). join_by() can also used perform inequality, rolling, overlap joins. See documentation ?join_by details types joins. simple equality joins, can alternatively specify character vector variable names join . example, = c(\"\", \"b\") joins x$y$x$b y$b. variable names differ x y, use named character vector like = c(\"x_a\" = \"y_a\", \"x_b\" = \"y_b\"). perform cross-join, generating combinations x y, see cross_join(). copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . keep new list-column contain join keys? default preserve join keys inequality joins. name name list-column created join. NULL, default, name y used. ... parameters passed onto methods. na_matches two NA two NaN values match? \"na\", default, treats two NA two NaN values equal, like %%, match(), merge(). \"never\" treats two NA two NaN values different, never match together values. similar joins database sources base::merge(incomparables = NA). unmatched unmatched keys result dropped rows handled? \"drop\" drops unmatched keys result. \"error\" throws error unmatched keys detected. unmatched intended protect accidentally dropping rows join. checks unmatched keys input potentially drop rows. left joins, checks y. right joins, checks x. inner joins, checks x y. case, unmatched also allowed character vector length 2 specify behavior x y independently.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Nest join — nest_join","text":"output: type x (including groups). exactly number rows x. Contains columns x order values. modified (slightly) keep = FALSE, columns listed coerced common type across x y. Gains one new column called {name} far right, list column containing data frames type y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"relationship-to-other-joins","dir":"Reference","previous_headings":"","what":"Relationship to other joins","title":"Nest join — nest_join","text":"can recreate many joins result nest join: inner_join() nest_join() plus tidyr::unnest(). left_join() nest_join() plus tidyr::unnest(keep_empty = TRUE). semi_join() nest_join() plus filter() check every element data least one row. anti_join() nest_join() plus filter() check every element zero rows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Nest join — nest_join","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Nest join — nest_join","text":"","code":"df1 <- tibble(x = 1:3) df2 <- tibble(x = c(2, 3, 3), y = c(\"a\", \"b\", \"c\")) out <- nest_join(df1, df2) #> Joining with `by = join_by(x)` out #> # A tibble: 3 × 2 #> x df2 #> #> 1 1 #> 2 2 #> 3 3 out$df2 #> [[1]] #> # A tibble: 0 × 1 #> # ℹ 1 variable: y #> #> [[2]] #> # A tibble: 1 × 1 #> y #> #> 1 a #> #> [[3]] #> # A tibble: 2 × 1 #> y #> #> 1 b #> 2 c #>"},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":null,"dir":"Reference","previous_headings":"","what":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"new_grouped_df() new_rowwise_df() constructors designed high-performance check types, values. means caller's responsibility create valid values, hence expert use . validate_grouped_df() validate_rowwise_df() validate attributes grouped_df rowwise_df.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"","code":"new_grouped_df(x, groups, ..., class = character()) validate_grouped_df(x, check_bounds = FALSE) new_rowwise_df(data, group_data = NULL, ..., class = character()) validate_rowwise_df(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"x data frame groups grouped structure, groups data frame. last column called .rows list 1 based integer vectors 1 number rows .data. ... additional attributes class additional class, prepended canonical classes. check_bounds whether check indices bounds problems grouped_df objects","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"","code":"# 5 bootstrap samples tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(\".rows\" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) # mean of each bootstrap sample summarise(tbl, x = mean(x)) #> # A tibble: 5 × 1 #> x #> #> 1 0.181 #> 2 -0.0442 #> 3 0.450 #> 4 -0.730 #> 5 -0.369"},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract the first, last, or nth value from a vector — nth","title":"Extract the first, last, or nth value from a vector — nth","text":"useful helpers extracting single value vector. guaranteed return meaningful value, even input shorter expected. can also provide optional secondary vector defines ordering.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract the first, last, or nth value from a vector — nth","text":"","code":"nth(x, n, order_by = NULL, default = NULL, na_rm = FALSE) first(x, order_by = NULL, default = NULL, na_rm = FALSE) last(x, order_by = NULL, default = NULL, na_rm = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract the first, last, or nth value from a vector — nth","text":"x vector n nth(), single integer specifying position. Negative integers index end (.e. -1L return last value vector). order_by optional vector size x used determine order. default default value use position exist x. NULL, default, missing value used. supplied, must single value, cast type x. x list , default allowed value. type size restrictions case. na_rm missing values x removed extracting value?","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract the first, last, or nth value from a vector — nth","text":"x list, single element list. Otherwise, vector type x size 1.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract the first, last, or nth value from a vector — nth","text":"vector types, first(x), last(x), nth(x, n) work like x[[1]], x[[length(x)], x[[n]], respectively. primary exception data frames, instead retrieve rows, .e. x[1, ], x[nrow(x), ], x[n, ]. consistent tidyverse/vctrs principle treats data frames vector rows, rather vector columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract the first, last, or nth value from a vector — nth","text":"","code":"x <- 1:10 y <- 10:1 first(x) #> [1] 1 last(y) #> [1] 1 nth(x, 1) #> [1] 1 nth(x, 5) #> [1] 5 nth(x, -2) #> [1] 9 # `first()` and `last()` are often useful in `summarise()` df <- tibble(x = x, y = y) df %>% summarise( across(x:y, first, .names = \"{col}_first\"), y_last = last(y) ) #> # A tibble: 1 × 3 #> x_first y_first y_last #> #> 1 1 10 1 # Selecting a position that is out of bounds returns a default value nth(x, 11) #> [1] NA nth(x, 0) #> [1] NA # This out of bounds behavior also applies to empty vectors first(integer()) #> [1] NA # You can customize the default value with `default` nth(x, 11, default = -1L) #> [1] -1 first(integer(), default = 0L) #> [1] 0 # `order_by` provides optional ordering last(x) #> [1] 10 last(x, order_by = y) #> [1] 1 # `na_rm` removes missing values before extracting the value z <- c(NA, NA, 1, 3, NA, 5, NA) first(z) #> [1] NA first(z, na_rm = TRUE) #> [1] 1 last(z, na_rm = TRUE) #> [1] 5 nth(z, 3, na_rm = TRUE) #> [1] 5 # For data frames, these select entire rows df <- tibble(a = 1:5, b = 6:10) first(df) #> # A tibble: 1 × 2 #> a b #> #> 1 1 6 nth(df, 4) #> # A tibble: 1 × 2 #> a b #> #> 1 4 9"},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":null,"dir":"Reference","previous_headings":"","what":"Bucket a numeric vector into n groups — ntile","title":"Bucket a numeric vector into n groups — ntile","text":"ntile() sort rough rank, breaks input vector n buckets. length(x) integer multiple n, size buckets differ one, larger buckets coming first. Unlike ranking functions, ntile() ignores ties: create evenly sized buckets even value x ends different buckets.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bucket a numeric vector into n groups — ntile","text":"","code":"ntile(x = row_number(), n)"},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bucket a numeric vector into n groups — ntile","text":"x vector rank default, smallest values get smallest ranks. Use desc() reverse direction largest values get smallest ranks. Missing values given rank NA. Use coalesce(x, Inf) coalesce(x, -Inf) want treat largest smallest values respectively. rank multiple columns , supply data frame. n Number groups bucket ","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bucket a numeric vector into n groups — ntile","text":"","code":"x <- c(5, 1, 3, 2, 2, NA) ntile(x, 2) #> [1] 2 1 2 1 1 NA ntile(x, 4) #> [1] 4 1 3 1 2 NA # If the bucket sizes are uneven, the larger buckets come first ntile(1:8, 3) #> [1] 1 1 1 2 2 2 3 3 # Ties are ignored ntile(rep(1, 8), 3) #> [1] 1 1 1 2 2 2 3 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":null,"dir":"Reference","previous_headings":"","what":"A helper function for ordering window function output — order_by","title":"A helper function for ordering window function output — order_by","text":"function makes possible control ordering window functions R specific ordering parameter. translated SQL modify order clause function.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A helper function for ordering window function output — order_by","text":"","code":"order_by(order_by, call)"},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A helper function for ordering window function output — order_by","text":"order_by vector order_by call function call window function, first argument vector operated ","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"A helper function for ordering window function output — order_by","text":"function works changing call instead call with_order() appropriate arguments.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A helper function for ordering window function output — order_by","text":"","code":"order_by(10:1, cumsum(1:10)) #> [1] 55 54 52 49 45 40 34 27 19 10 x <- 10:1 y <- 1:10 order_by(x, cumsum(y)) #> [1] 55 54 52 49 45 40 34 27 19 10 df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, running = cumsum(value)) arrange(wrong, year) #> year value running #> 1 2000 0 34 #> 2 2001 1 51 #> 3 2002 4 55 #> 4 2003 9 34 #> 5 2004 16 50 #> 6 2005 25 25 right <- mutate(scrambled, running = order_by(year, cumsum(value))) arrange(right, year) #> year value running #> 1 2000 0 0 #> 2 2001 1 1 #> 3 2002 4 5 #> 4 2003 9 14 #> 5 2004 16 30 #> 6 2005 25 55"},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":null,"dir":"Reference","previous_headings":"","what":"Proportional ranking functions — percent_rank","title":"Proportional ranking functions — percent_rank","text":"two ranking functions implement two slightly different ways compute percentile. x_i x: cume_dist(x) counts total number values less equal x_i, divides number observations. percent_rank(x) counts total number values less x_i, divides number observations minus 1. cases, missing values ignored counting number observations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Proportional ranking functions — percent_rank","text":"","code":"percent_rank(x) cume_dist(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Proportional ranking functions — percent_rank","text":"x vector rank default, smallest values get smallest ranks. Use desc() reverse direction largest values get smallest ranks. Missing values given rank NA. Use coalesce(x, Inf) coalesce(x, -Inf) want treat largest smallest values respectively. rank multiple columns , supply data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Proportional ranking functions — percent_rank","text":"numeric vector containing proportion.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Proportional ranking functions — percent_rank","text":"","code":"x <- c(5, 1, 3, 2, 2) cume_dist(x) #> [1] 1.0 0.2 0.8 0.6 0.6 percent_rank(x) #> [1] 1.00 0.00 0.75 0.25 0.25 # You can understand what's going on by computing it by hand sapply(x, function(xi) sum(x <= xi) / length(x)) #> [1] 1.0 0.2 0.8 0.6 0.6 sapply(x, function(xi) sum(x < xi) / (length(x) - 1)) #> [1] 1.00 0.00 0.75 0.25 0.25 # The real computations are a little more complex in order to # correctly deal with missing values"},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":null,"dir":"Reference","previous_headings":"","what":"Select a subset of columns — pick","title":"Select a subset of columns — pick","text":"pick() provides way easily select subset columns data using select() semantics inside \"data-masking\" function like mutate() summarise(). pick() returns data frame containing selected columns current group. pick() complementary across(): pick(), typically apply function full data frame. across(), typically apply function column.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select a subset of columns — pick","text":"","code":"pick(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select a subset of columns — pick","text":"... Columns pick. pick grouping columns already automatically handled verb (.e. summarise() mutate()).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Select a subset of columns — pick","text":"tibble containing selected columns current group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Select a subset of columns — pick","text":"Theoretically, pick() intended replaceable equivalent call tibble(). example, pick(, c) replaced tibble(= , c = c), pick(everything()) data frame cols , b, c replaced tibble(= , b = b, c = c). pick() specially handles case empty selection returning 1 row, 0 column tibble, exact replacement like:","code":"size <- vctrs::vec_size_common(..., .absent = 1L) out <- vctrs::vec_recycle_common(..., .size = size) tibble::new_tibble(out, nrow = size)"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select a subset of columns — pick","text":"","code":"df <- tibble( x = c(3, 2, 2, 2, 1), y = c(0, 2, 1, 1, 4), z1 = c(\"a\", \"a\", \"a\", \"b\", \"a\"), z2 = c(\"c\", \"d\", \"d\", \"a\", \"c\") ) df #> # A tibble: 5 × 4 #> x y z1 z2 #> #> 1 3 0 a c #> 2 2 2 a d #> 3 2 1 a d #> 4 2 1 b a #> 5 1 4 a c # `pick()` provides a way to select a subset of your columns using # tidyselect. It returns a data frame. df %>% mutate(cols = pick(x, y)) #> # A tibble: 5 × 5 #> x y z1 z2 cols$x $y #> #> 1 3 0 a c 3 0 #> 2 2 2 a d 2 2 #> 3 2 1 a d 2 1 #> 4 2 1 b a 2 1 #> 5 1 4 a c 1 4 # This is useful for functions that take data frames as inputs. # For example, you can compute a joint rank between `x` and `y`. df %>% mutate(rank = dense_rank(pick(x, y))) #> # A tibble: 5 × 5 #> x y z1 z2 rank #> #> 1 3 0 a c 4 #> 2 2 2 a d 3 #> 3 2 1 a d 2 #> 4 2 1 b a 2 #> 5 1 4 a c 1 # `pick()` is also useful as a bridge between data-masking functions (like # `mutate()` or `group_by()`) and functions with tidy-select behavior (like # `select()`). For example, you can use `pick()` to create a wrapper around # `group_by()` that takes a tidy-selection of columns to group on. For more # bridge patterns, see # https://rlang.r-lib.org/reference/topic-data-mask-programming.html#bridge-patterns. my_group_by <- function(data, cols) { group_by(data, pick({{ cols }})) } df %>% my_group_by(c(x, starts_with(\"z\"))) #> # A tibble: 5 × 4 #> # Groups: x, z1, z2 [4] #> x y z1 z2 #> #> 1 3 0 a c #> 2 2 2 a d #> 3 2 1 a d #> 4 2 1 b a #> 5 1 4 a c # Or you can use it to dynamically select columns to `count()` by df %>% count(pick(starts_with(\"z\"))) #> # A tibble: 3 × 3 #> z1 z2 n #> #> 1 a c 2 #> 2 a d 2 #> 3 b a 1"},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":null,"dir":"Reference","previous_headings":"","what":"Progress bar with estimated time. — progress_estimated","title":"Progress bar with estimated time. — progress_estimated","text":"progress bar deprecated since providing progress bars responsibility dplyr. Instead, might try powerful progress package. reference class represents text progress bar displayed estimated time remaining. finished, displays total duration. automatic progress bar can disabled setting option dplyr.show_progress FALSE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Progress bar with estimated time. — progress_estimated","text":"","code":"progress_estimated(n, min_time = 0)"},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Progress bar with estimated time. — progress_estimated","text":"n Total number items min_time Progress bar wait least min_time seconds elapsed displaying results.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Progress bar with estimated time. — progress_estimated","text":"ref class methods tick(), print(), pause(), stop().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Progress bar with estimated time. — progress_estimated","text":"","code":"p <- progress_estimated(3) #> Warning: `progress_estimated()` was deprecated in dplyr 1.0.0. p$tick() p$tick() p$tick() p <- progress_estimated(3) for (i in 1:3) p$pause(0.1)$tick()$print() p <- progress_estimated(3) p$tick()$print()$ pause(1)$stop() # If min_time is set, progress bar not shown until that many # seconds have elapsed p <- progress_estimated(3, min_time = 3) for (i in 1:3) p$pause(0.1)$tick()$print() if (FALSE) { p <- progress_estimated(10, min_time = 3) for (i in 1:10) p$pause(0.5)$tick()$print() }"},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract a single column — pull","title":"Extract a single column — pull","text":"pull() similar $. mostly useful looks little nicer pipes, also works remote data frames, can optionally name output.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract a single column — pull","text":"","code":"pull(.data, var = -1, name = NULL, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract a single column — pull","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. var variable specified : literal variable name positive integer, giving position counting left negative integer, giving position counting right. default returns last column (assumption column created recently). argument taken expression supports quasiquotation (can unquote column names column locations). name optional parameter specifies column used names named vector. Specified similar manner var. ... use methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract a single column — pull","text":"vector size .data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Extract a single column — pull","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_sql), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract a single column — pull","text":"","code":"mtcars %>% pull(-1) #> [1] 4 4 1 1 2 1 4 2 2 4 4 3 3 3 4 4 4 1 2 1 1 2 2 4 2 1 2 2 4 6 8 2 mtcars %>% pull(1) #> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 #> [29] 15.8 19.7 15.0 21.4 mtcars %>% pull(cyl) #> [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4 # Also works for remote sources df <- dbplyr::memdb_frame(x = 1:10, y = 10:1, .name = \"pull-ex\") df %>% mutate(z = x * y) %>% pull() #> [1] 10 18 24 28 30 30 28 24 18 10 # Pull a named vector starwars %>% pull(height, name) #> Luke Skywalker C-3PO R2-D2 #> 172 167 96 #> Darth Vader Leia Organa Owen Lars #> 202 150 178 #> Beru Whitesun Lars R5-D4 Biggs Darklighter #> 165 97 183 #> Obi-Wan Kenobi Anakin Skywalker Wilhuff Tarkin #> 182 188 180 #> Chewbacca Han Solo Greedo #> 228 180 173 #> Jabba Desilijic Tiure Wedge Antilles Jek Tono Porkins #> 175 170 180 #> Yoda Palpatine Boba Fett #> 66 170 183 #> IG-88 Bossk Lando Calrissian #> 200 190 177 #> Lobot Ackbar Mon Mothma #> 175 180 150 #> Arvel Crynyd Wicket Systri Warrick Nien Nunb #> NA 88 160 #> Qui-Gon Jinn Nute Gunray Finis Valorum #> 193 191 170 #> Padmé Amidala Jar Jar Binks Roos Tarpals #> 185 196 224 #> Rugor Nass Ric Olié Watto #> 206 183 137 #> Sebulba Quarsh Panaka Shmi Skywalker #> 112 183 163 #> Darth Maul Bib Fortuna Ayla Secura #> 175 180 178 #> Ratts Tyerel Dud Bolt Gasgano #> 79 94 122 #> Ben Quadinaros Mace Windu Ki-Adi-Mundi #> 163 188 198 #> Kit Fisto Eeth Koth Adi Gallia #> 196 171 184 #> Saesee Tiin Yarael Poof Plo Koon #> 188 264 188 #> Mas Amedda Gregar Typho Cordé #> 196 185 157 #> Cliegg Lars Poggle the Lesser Luminara Unduli #> 183 183 170 #> Barriss Offee Dormé Dooku #> 166 165 193 #> Bail Prestor Organa Jango Fett Zam Wesell #> 191 183 168 #> Dexter Jettster Lama Su Taun We #> 198 229 213 #> Jocasta Nu R4-P17 Wat Tambor #> 167 96 193 #> San Hill Shaak Ti Grievous #> 191 178 216 #> Tarfful Raymus Antilles Sly Moore #> 234 188 178 #> Tion Medon Finn Rey #> 206 NA NA #> Poe Dameron BB8 Captain Phasma #> NA NA NA"},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":null,"dir":"Reference","previous_headings":"","what":"Recode values — recode","title":"Recode values — recode","text":"recode() superseded favor case_match(), handles important cases recode() elegant interface. recode_factor() also superseded, however, direct replacement currently available eventually live forcats. creating new variables based logical vectors, use if_else(). even complicated criteria, use case_when(). recode() vectorised version switch(): can replace numeric values based position name, character factor values name. S3 generic: dplyr provides methods numeric, character, factors. can use recode() directly factors; preserve existing order levels changing values. Alternatively, can use recode_factor(), change order levels match order replacements.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Recode values — recode","text":"","code":"recode(.x, ..., .default = NULL, .missing = NULL) recode_factor(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Recode values — recode","text":".x vector modify ... Replacements. character factor .x, named replacement based name. numeric .x, can named . named, replacement done based position .e. .x represents positions look replacements. See examples. named, argument names current values replaced, argument values new (replacement) values. replacements must type, must either length one length .x. .default supplied, values otherwise matched given value. supplied replacements type original values .x, unmatched values changed. supplied replacements compatible, unmatched values replaced NA. .default must either length 1 length .x. .missing supplied, missing values .x replaced value. Must either length 1 length .x. .ordered TRUE, recode_factor() creates ordered factor.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Recode values — recode","text":"vector length .x, type first ..., .default, .missing. recode_factor() returns factor whose levels order .... levels .default .missing come last.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Recode values — recode","text":"","code":"char_vec <- sample(c(\"a\", \"b\", \"c\"), 10, replace = TRUE) # `recode()` is superseded by `case_match()` recode(char_vec, a = \"Apple\", b = \"Banana\") #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" \"c\" case_match(char_vec, \"a\" ~ \"Apple\", \"b\" ~ \"Banana\", .default = char_vec) #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" \"c\" # With `case_match()`, you don't need typed missings like `NA_character_` recode(char_vec, a = \"Apple\", b = \"Banana\", .default = NA_character_) #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" NA case_match(char_vec, \"a\" ~ \"Apple\", \"b\" ~ \"Banana\", .default = NA) #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" NA # Throws an error as `NA` is logical, not character. try(recode(char_vec, a = \"Apple\", b = \"Banana\", .default = NA)) #> Error in recode(char_vec, a = \"Apple\", b = \"Banana\", .default = NA) : #> `.default` must be a character vector, not `NA`. # `case_match()` is easier to use with numeric vectors, because you don't # need to turn the numeric values into names num_vec <- c(1:4, NA) recode(num_vec, `2` = 20L, `4` = 40L) #> [1] 1 20 3 40 NA case_match(num_vec, 2 ~ 20, 4 ~ 40, .default = num_vec) #> [1] 1 20 3 40 NA # `case_match()` doesn't have the ability to match by position like # `recode()` does with numeric vectors recode(num_vec, \"a\", \"b\", \"c\", \"d\") #> [1] \"a\" \"b\" \"c\" \"d\" NA recode(c(1,5,3), \"a\", \"b\", \"c\", \"d\", .default = \"nothing\") #> [1] \"a\" \"nothing\" \"c\" # For `case_match()`, incompatible types are an error rather than a warning recode(num_vec, `2` = \"b\", `4` = \"d\") #> Warning: Unreplaced values treated as NA as `.x` is not compatible. #> Please specify replacements exhaustively or supply `.default`. #> [1] NA \"b\" NA \"d\" NA try(case_match(num_vec, 2 ~ \"b\", 4 ~ \"d\", .default = num_vec)) #> Error in case_match(num_vec, 2 ~ \"b\", 4 ~ \"d\", .default = num_vec) : #> Can't combine `..1 (right)` and `.default` . # The factor method of `recode()` can generally be replaced with # `forcats::fct_recode()` factor_vec <- factor(c(\"a\", \"b\", \"c\")) recode(factor_vec, a = \"Apple\") #> [1] Apple b c #> Levels: Apple b c # `recode_factor()` does not currently have a direct replacement, but we # plan to add one to forcats. In the meantime, you can use the `.ptype` # argument to `case_match()`. recode_factor( num_vec, `1` = \"z\", `2` = \"y\", `3` = \"x\", .default = \"D\", .missing = \"M\" ) #> [1] z y x D M #> Levels: z y x D M case_match( num_vec, 1 ~ \"z\", 2 ~ \"y\", 3 ~ \"x\", NA ~ \"M\", .default = \"D\", .ptype = factor(levels = c(\"z\", \"y\", \"x\", \"D\", \"M\")) ) #> [1] z y x D M #> Levels: z y x D M"},{"path":"https://dplyr.tidyverse.org/dev/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. magrittr %>% pillar type_sum tibble add_row, as_data_frame, as_tibble, data_frame, lst, tibble, tribble, view tidyselect all_of, any_of, contains, ends_with, everything, last_col, matches, num_range, one_of, starts_with, ","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":null,"dir":"Reference","previous_headings":"","what":"Transform each group to an arbitrary number of rows — reframe","title":"Transform each group to an arbitrary number of rows — reframe","text":"summarise() requires argument returns single value, mutate() requires argument returns number rows input, reframe() general workhorse requirements number rows returned per group. reframe() creates new data frame applying functions columns existing data frame. similar summarise(), two big differences: reframe() can return arbitrary number rows per group, summarise() reduces group single row. reframe() always returns ungrouped data frame, summarise() might return grouped rowwise data frame, depending scenario. expect use summarise() much often reframe(), reframe() can particularly helpful need apply complex function return single summary value.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Transform each group to an arbitrary number of rows — reframe","text":"","code":"reframe(.data, ..., .by = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Transform each group to an arbitrary number of rows — reframe","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs functions. name name variable result. value can vector length. Unnamed data frame values add multiple columns single expression. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Transform each group to an arbitrary number of rows — reframe","text":".data tibble, tibble. Otherwise, data.frame. rows originate underlying grouping keys. columns combination grouping keys expressions provide. output always ungrouped. Data frame attributes preserved, reframe() fundamentally creates new data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"connection-to-tibble","dir":"Reference","previous_headings":"","what":"Connection to tibble","title":"Transform each group to an arbitrary number of rows — reframe","text":"reframe() theoretically connected two functions tibble, tibble::enframe() tibble::deframe(): enframe(): vector -> data frame deframe(): data frame -> vector reframe(): data frame -> data frame","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Transform each group to an arbitrary number of rows — reframe","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Transform each group to an arbitrary number of rows — reframe","text":"","code":"table <- c(\"a\", \"b\", \"d\", \"f\") df <- tibble( g = c(1, 1, 1, 2, 2, 2, 2), x = c(\"e\", \"a\", \"b\", \"c\", \"f\", \"d\", \"a\") ) # `reframe()` allows you to apply functions that return # an arbitrary number of rows df %>% reframe(x = intersect(x, table)) #> # A tibble: 4 × 1 #> x #> #> 1 a #> 2 b #> 3 f #> 4 d # Functions are applied per group, and each group can return a # different number of rows. df %>% reframe(x = intersect(x, table), .by = g) #> # A tibble: 5 × 2 #> g x #> #> 1 1 a #> 2 1 b #> 3 2 f #> 4 2 d #> 5 2 a # The output is always ungrouped, even when using `group_by()` df %>% group_by(g) %>% reframe(x = intersect(x, table)) #> # A tibble: 5 × 2 #> g x #> #> 1 1 a #> 2 1 b #> 3 2 f #> 4 2 d #> 5 2 a # You can add multiple columns at once using a single expression by returning # a data frame. quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs, na.rm = TRUE), quant = probs ) } x <- c(10, 15, 18, 12) quantile_df(x) #> # A tibble: 3 × 2 #> val quant #> #> 1 11.5 0.25 #> 2 13.5 0.5 #> 3 15.8 0.75 starwars %>% reframe(quantile_df(height)) #> # A tibble: 3 × 2 #> val quant #> #> 1 167 0.25 #> 2 180 0.5 #> 3 191 0.75 starwars %>% reframe(quantile_df(height), .by = homeworld) #> # A tibble: 147 × 3 #> homeworld val quant #> #> 1 Tatooine 166. 0.25 #> 2 Tatooine 175 0.5 #> 3 Tatooine 183 0.75 #> 4 Naboo 168. 0.25 #> 5 Naboo 183 0.5 #> 6 Naboo 190. 0.75 #> 7 Alderaan 169 0.25 #> 8 Alderaan 188 0.5 #> 9 Alderaan 190. 0.75 #> 10 Stewjon 182 0.25 #> # ℹ 137 more rows starwars %>% reframe( across(c(height, mass), quantile_df, .unpack = TRUE), .by = homeworld ) #> # A tibble: 147 × 5 #> homeworld height_val height_quant mass_val mass_quant #> #> 1 Tatooine 166. 0.25 75 0.25 #> 2 Tatooine 175 0.5 80.5 0.5 #> 3 Tatooine 183 0.75 93 0.75 #> 4 Naboo 168. 0.25 50.2 0.25 #> 5 Naboo 183 0.5 70.5 0.5 #> 6 Naboo 190. 0.75 80.2 0.75 #> 7 Alderaan 169 0.25 56.5 0.25 #> 8 Alderaan 188 0.5 64 0.5 #> 9 Alderaan 190. 0.75 71.5 0.75 #> 10 Stewjon 182 0.25 77 0.25 #> # ℹ 137 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":null,"dir":"Reference","previous_headings":"","what":"Change column order — relocate","title":"Change column order — relocate","text":"Use relocate() change column positions, using syntax select() make easy move blocks columns .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Change column order — relocate","text":"","code":"relocate(.data, ..., .before = NULL, .after = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Change column order — relocate","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Columns move. ., . Destination columns selected .... Supplying neither move columns left-hand side; specifying error.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Change column order — relocate","text":"object type .data. output following properties: Rows affected. columns appear output, (usually) different place possibly renamed. Data frame attributes preserved. Groups affected.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Change column order — relocate","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Change column order — relocate","text":"","code":"df <- tibble(a = 1, b = 1, c = 1, d = \"a\", e = \"a\", f = \"a\") df %>% relocate(f) #> # A tibble: 1 × 6 #> f a b c d e #> #> 1 a 1 1 1 a a df %>% relocate(a, .after = c) #> # A tibble: 1 × 6 #> b c a d e f #> #> 1 1 1 1 a a a df %>% relocate(f, .before = b) #> # A tibble: 1 × 6 #> a f b c d e #> #> 1 1 a 1 1 a a df %>% relocate(a, .after = last_col()) #> # A tibble: 1 × 6 #> b c d e f a #> #> 1 1 1 a a a 1 # relocated columns can change name df %>% relocate(ff = f) #> # A tibble: 1 × 6 #> ff a b c d e #> #> 1 a 1 1 1 a a # Can also select variables based on their type df %>% relocate(where(is.character)) #> # A tibble: 1 × 6 #> d e f a b c #> #> 1 a a a 1 1 1 df %>% relocate(where(is.numeric), .after = last_col()) #> # A tibble: 1 × 6 #> d e f a b c #> #> 1 a a a 1 1 1 # Or with any other select helper df %>% relocate(any_of(c(\"a\", \"e\", \"i\", \"o\", \"u\"))) #> # A tibble: 1 × 6 #> a e b c d f #> #> 1 1 a 1 1 a a # When .before or .after refers to multiple variables they will be # moved to be immediately before/after the selected variables. df2 <- tibble(a = 1, b = \"a\", c = 1, d = \"a\") df2 %>% relocate(where(is.numeric), .after = where(is.character)) #> # A tibble: 1 × 4 #> b d a c #> #> 1 a a 1 1 df2 %>% relocate(where(is.numeric), .before = where(is.character)) #> # A tibble: 1 × 4 #> a c b d #> #> 1 1 1 a a"},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":null,"dir":"Reference","previous_headings":"","what":"Rename columns — rename","title":"Rename columns — rename","text":"rename() changes names individual variables using new_name = old_name syntax; rename_with() renames columns using function.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Rename columns — rename","text":"","code":"rename(.data, ...) rename_with(.data, .fn, .cols = everything(), ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Rename columns — rename","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... rename(): Use new_name = old_name rename selected variables. rename_with(): additional arguments passed onto .fn. .fn function used transform selected .cols. return character vector length input. .cols Columns rename; defaults columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Rename columns — rename","text":"object type .data. output following properties: Rows affected. Column names changed; column order preserved. Data frame attributes preserved. Groups updated reflect new names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Rename columns — rename","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Rename columns — rename","text":"","code":"iris <- as_tibble(iris) # so it prints a little nicer rename(iris, petal_length = Petal.Length) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width petal_length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # Rename using a named vector and `all_of()` lookup <- c(pl = \"Petal.Length\", sl = \"Sepal.Length\") rename(iris, all_of(lookup)) #> # A tibble: 150 × 5 #> sl Sepal.Width pl Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # If your named vector might contain names that don't exist in the data, # use `any_of()` instead lookup <- c(lookup, new = \"unknown\") try(rename(iris, all_of(lookup))) #> Error in rename(iris, all_of(lookup)) : #> ℹ In argument: `all_of(lookup)`. #> Caused by error in `all_of()`: #> ! Can't subset elements that don't exist. #> ✖ Element `unknown` doesn't exist. rename(iris, any_of(lookup)) #> # A tibble: 150 × 5 #> sl Sepal.Width pl Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows rename_with(iris, toupper) #> # A tibble: 150 × 5 #> SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows rename_with(iris, toupper, starts_with(\"Petal\")) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width PETAL.LENGTH PETAL.WIDTH Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows rename_with(iris, ~ tolower(gsub(\".\", \"_\", .x, fixed = TRUE))) #> # A tibble: 150 × 5 #> sepal_length sepal_width petal_length petal_width species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # If your renaming function uses `paste0()`, make sure to set # `recycle0 = TRUE` to ensure that empty selections are recycled correctly try(rename_with( iris, ~ paste0(\"prefix_\", .x), starts_with(\"nonexistent\") )) #> Error in rename_with(iris, ~paste0(\"prefix_\", .x), starts_with(\"nonexistent\")) : #> `.fn` must return a vector of length 0, not 1. rename_with( iris, ~ paste0(\"prefix_\", .x, recycle0 = TRUE), starts_with(\"nonexistent\") ) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":null,"dir":"Reference","previous_headings":"","what":"Integer ranking functions — row_number","title":"Integer ranking functions — row_number","text":"Three ranking functions inspired SQL2003. differ primarily handle ties: row_number() gives every input unique rank, c(10, 20, 20, 30) get ranks c(1, 2, 3, 4). equivalent rank(ties.method = \"first\"). min_rank() gives every tie (smallest) value c(10, 20, 20, 30) gets ranks c(1, 2, 2, 4). way ranks usually computed sports equivalent rank(ties.method = \"min\"). dense_rank() works like min_rank(), leave gaps, c(10, 20, 20, 30) gets ranks c(1, 2, 2, 3).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Integer ranking functions — row_number","text":"","code":"row_number(x) min_rank(x) dense_rank(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Integer ranking functions — row_number","text":"x vector rank default, smallest values get smallest ranks. Use desc() reverse direction largest values get smallest ranks. Missing values given rank NA. Use coalesce(x, Inf) coalesce(x, -Inf) want treat largest smallest values respectively. rank multiple columns , supply data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Integer ranking functions — row_number","text":"integer vector.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Integer ranking functions — row_number","text":"","code":"x <- c(5, 1, 3, 2, 2, NA) row_number(x) #> [1] 5 1 4 2 3 NA min_rank(x) #> [1] 5 1 4 2 2 NA dense_rank(x) #> [1] 4 1 3 2 2 NA # Ranking functions can be used in `filter()` to select top/bottom rows df <- data.frame( grp = c(1, 1, 1, 2, 2, 2, 3, 3, 3), x = c(3, 2, 1, 1, 2, 2, 1, 1, 1), y = c(1, 3, 2, 3, 2, 2, 4, 1, 2), id = 1:9 ) # Always gives exactly 1 row per group df %>% group_by(grp) %>% filter(row_number(x) == 1) #> # A tibble: 3 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 1 2 3 #> 2 2 1 3 4 #> 3 3 1 4 7 # May give more than 1 row if ties df %>% group_by(grp) %>% filter(min_rank(x) == 1) #> # A tibble: 5 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 1 2 3 #> 2 2 1 3 4 #> 3 3 1 4 7 #> 4 3 1 1 8 #> 5 3 1 2 9 # Rank by multiple columns (to break ties) by selecting them with `pick()` df %>% group_by(grp) %>% filter(min_rank(pick(x, y)) == 1) #> # A tibble: 3 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 1 2 3 #> 2 2 1 3 4 #> 3 3 1 1 8 # See slice_min() and slice_max() for another way to tackle the same problem # You can use row_number() without an argument to refer to the \"current\" # row number. df %>% group_by(grp) %>% filter(row_number() == 1) #> # A tibble: 3 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 3 1 1 #> 2 2 1 3 4 #> 3 3 1 4 7 # It's easiest to see what this does with mutate(): df %>% group_by(grp) %>% mutate(grp_id = row_number()) #> # A tibble: 9 × 5 #> # Groups: grp [3] #> grp x y id grp_id #> #> 1 1 3 1 1 1 #> 2 1 2 3 2 2 #> 3 1 1 2 3 3 #> 4 2 1 3 4 1 #> 5 2 2 2 5 2 #> 6 2 2 2 6 3 #> 7 3 1 4 7 1 #> 8 3 1 1 8 2 #> 9 3 1 2 9 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":null,"dir":"Reference","previous_headings":"","what":"Manipulate individual rows — rows","title":"Manipulate individual rows — rows","text":"functions provide framework modifying rows table using second table data. two tables matched set key variables whose values typically uniquely identify row. functions inspired SQL's INSERT, UPDATE, DELETE, can optionally modify in_place selected backends. rows_insert() adds new rows (like INSERT). default, key values y must exist x. rows_append() works like rows_insert() ignores keys. rows_update() modifies existing rows (like UPDATE). Key values y must unique, , default, key values y must exist x. rows_patch() works like rows_update() overwrites NA values. rows_upsert() inserts updates depending whether key value y already exists x. Key values y must unique. rows_delete() deletes rows (like DELETE). default, key values y must exist x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Manipulate individual rows — rows","text":"","code":"rows_insert( x, y, by = NULL, ..., conflict = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE ) rows_append(x, y, ..., copy = FALSE, in_place = FALSE) rows_update( x, y, by = NULL, ..., unmatched = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE ) rows_patch( x, y, by = NULL, ..., unmatched = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE ) rows_upsert(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_delete( x, y, by = NULL, ..., unmatched = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE )"},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Manipulate individual rows — rows","text":"x, y pair data frames data frame extensions (e.g. tibble). y must columns x subset. unnamed character vector giving key columns. key columns must exist x y. Keys typically uniquely identify row, enforced key values y rows_update(), rows_patch(), rows_upsert() used. default, use first column y, since first column reasonable place put identifier variable. ... parameters passed onto methods. conflict rows_insert(), keys y conflict keys x handled? conflict arises key y already exists x. One : \"error\", default, error keys y conflict keys x. \"ignore\" ignore rows y keys conflict keys x. copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . in_place x modified place? argument relevant mutable backends (e.g. databases, data.tables). TRUE, modified version x returned invisibly; FALSE, new object representing resulting changes returned. unmatched rows_update(), rows_patch(), rows_delete(), keys y unmatched keys x handled? One : \"error\", default, error keys y unmatched keys x. \"ignore\" ignore rows y keys unmatched keys x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Manipulate individual rows — rows","text":"object type x. order rows columns x preserved much possible. output following properties: rows_update() rows_patch() preserve number rows; rows_insert(), rows_append(), rows_upsert() return existing rows potentially new rows; rows_delete() returns subset rows. Columns added, removed, relocated, though data may updated. Groups taken x. Data frame attributes taken x. in_place = TRUE, result returned invisibly.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Manipulate individual rows — rows","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: rows_insert(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_append(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_update(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_patch(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_upsert(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_delete(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Manipulate individual rows — rows","text":"","code":"data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) data #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 # Insert rows_insert(data, tibble(a = 4, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 4 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 #> 4 4 z NA # By default, if a key in `y` matches a key in `x`, then it can't be inserted # and will throw an error. Alternatively, you can ignore rows in `y` # containing keys that conflict with keys in `x` with `conflict = \"ignore\"`, # or you can use `rows_append()` to ignore keys entirely. try(rows_insert(data, tibble(a = 3, b = \"z\"))) #> Matching, by = \"a\" #> Error in rows_insert(data, tibble(a = 3, b = \"z\")) : #> `y` can't contain keys that already exist in `x`. #> ℹ The following rows in `y` have keys that already exist in `x`: `c(1)`. #> ℹ Use `conflict = \"ignore\"` if you want to ignore these `y` rows. rows_insert(data, tibble(a = 3, b = \"z\"), conflict = \"ignore\") #> Matching, by = \"a\" #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 rows_append(data, tibble(a = 3, b = \"z\")) #> # A tibble: 4 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 #> 4 3 z NA # Update rows_update(data, tibble(a = 2:3, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 z 1.5 #> 3 3 z 2.5 rows_update(data, tibble(b = \"z\", a = 2:3), by = \"a\") #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 z 1.5 #> 3 3 z 2.5 # Variants: patch and upsert rows_patch(data, tibble(a = 2:3, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 z 2.5 rows_upsert(data, tibble(a = 2:4, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 4 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 z 1.5 #> 3 3 z 2.5 #> 4 4 z NA # Delete and truncate rows_delete(data, tibble(a = 2:3)) #> Matching, by = \"a\" #> # A tibble: 1 × 3 #> a b c #> #> 1 1 a 0.5 rows_delete(data, tibble(a = 2:3, b = \"b\")) #> Matching, by = \"a\" #> Ignoring extra `y` columns: b #> # A tibble: 1 × 3 #> a b c #> #> 1 1 a 0.5 # By default, for update, patch, and delete it is an error if a key in `y` # doesn't exist in `x`. You can ignore rows in `y` that have unmatched keys # with `unmatched = \"ignore\"`. y <- tibble(a = 3:4, b = \"z\") try(rows_update(data, y, by = \"a\")) #> Error in rows_update(data, y, by = \"a\") : #> `y` must contain keys that already exist in `x`. #> ℹ The following rows in `y` have keys that don't exist in `x`: `c(2)`. #> ℹ Use `unmatched = \"ignore\"` if you want to ignore these `y` rows. rows_update(data, y, by = \"a\", unmatched = \"ignore\") #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 z 2.5 rows_patch(data, y, by = \"a\", unmatched = \"ignore\") #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 z 2.5 rows_delete(data, y, by = \"a\", unmatched = \"ignore\") #> Ignoring extra `y` columns: b #> # A tibble: 2 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5"},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":null,"dir":"Reference","previous_headings":"","what":"Group input by rows — rowwise","title":"Group input by rows — rowwise","text":"rowwise() allows compute data frame row---time. useful vectorised function exist. dplyr verbs preserve row-wise grouping. exception summarise(), return grouped_df. can explicitly ungroup ungroup() as_tibble(), convert grouped_df group_by().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Group input by rows — rowwise","text":"","code":"rowwise(data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Group input by rows — rowwise","text":"data Input data frame. ... Variables preserved calling summarise(). typically set variables whose combination uniquely identify row. NB: unlike group_by() can create new variables instead can select multiple variables (e.g.) everything().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Group input by rows — rowwise","text":"row-wise data frame class rowwise_df. Note rowwise_df implicitly grouped row, grouped_df.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"list-columns","dir":"Reference","previous_headings":"","what":"List-columns","title":"Group input by rows — rowwise","text":"rowwise exactly one row per group offers small convenience working list-columns. Normally, summarise() mutate() extract groups worth data [. index list way, get back another list. working rowwise tibble, dplyr use [[ instead [ make life little easier.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Group input by rows — rowwise","text":"","code":"df <- tibble(x = runif(6), y = runif(6), z = runif(6)) # Compute the mean of x, y, z in each row df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) #> # A tibble: 6 × 4 #> # Rowwise: #> x y z m #> #> 1 0.386 0.929 0.723 0.679 #> 2 0.237 0.645 0.696 0.526 #> 3 0.520 0.783 0.871 0.724 #> 4 0.641 0.0587 0.709 0.470 #> 5 0.830 0.358 0.355 0.514 #> 6 0.919 0.479 0.807 0.735 # use c_across() to more easily select many variables df %>% rowwise() %>% mutate(m = mean(c_across(x:z))) #> # A tibble: 6 × 4 #> # Rowwise: #> x y z m #> #> 1 0.386 0.929 0.723 0.679 #> 2 0.237 0.645 0.696 0.526 #> 3 0.520 0.783 0.871 0.724 #> 4 0.641 0.0587 0.709 0.470 #> 5 0.830 0.358 0.355 0.514 #> 6 0.919 0.479 0.807 0.735 # Compute the minimum of x and y in each row df %>% rowwise() %>% mutate(m = min(c(x, y, z))) #> # A tibble: 6 × 4 #> # Rowwise: #> x y z m #> #> 1 0.386 0.929 0.723 0.386 #> 2 0.237 0.645 0.696 0.237 #> 3 0.520 0.783 0.871 0.520 #> 4 0.641 0.0587 0.709 0.0587 #> 5 0.830 0.358 0.355 0.355 #> 6 0.919 0.479 0.807 0.479 # In this case you can use an existing vectorised function: df %>% mutate(m = pmin(x, y, z)) #> # A tibble: 6 × 4 #> x y z m #> #> 1 0.386 0.929 0.723 0.386 #> 2 0.237 0.645 0.696 0.237 #> 3 0.520 0.783 0.871 0.520 #> 4 0.641 0.0587 0.709 0.0587 #> 5 0.830 0.358 0.355 0.355 #> 6 0.919 0.479 0.807 0.479 # Where these functions exist they'll be much faster than rowwise # so be on the lookout for them. # rowwise() is also useful when doing simulations params <- tribble( ~sim, ~n, ~mean, ~sd, 1, 1, 1, 1, 2, 2, 2, 4, 3, 3, -1, 2 ) # Here I supply variables to preserve after the computation params %>% rowwise(sim) %>% reframe(z = rnorm(n, mean, sd)) #> # A tibble: 6 × 2 #> sim z #> #> 1 1 0.622 #> 2 2 7.32 #> 3 2 -1.09 #> 4 3 0.0376 #> 5 3 -0.787 #> 6 3 0.163 # If you want one row per simulation, put the results in a list() params %>% rowwise(sim) %>% summarise(z = list(rnorm(n, mean, sd)), .groups = \"keep\") #> # A tibble: 3 × 2 #> # Groups: sim [3] #> sim z #> #> 1 1 #> 2 2 #> 3 3 "},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":null,"dir":"Reference","previous_headings":"","what":"Figure out if two sources are the same (or two tbl have the same source) — same_src","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"Figure two sources (two tbl source)","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"","code":"same_src(x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"x, y src tbls test","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"logical flag","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample n rows from a table — sample_n","title":"Sample n rows from a table — sample_n","text":"sample_n() sample_frac() superseded favour slice_sample(). deprecated near future, retirement means perform critical bug fixes, recommend moving newer alternative. functions superseded realised convenient two mutually exclusive arguments one function, rather two separate functions. also made clean smaller design issues sample_n()/sample_frac: connection slice() obvious. name first argument, tbl, inconsistent single table verbs use .data. size argument uses tidy evaluation, surprising undocumented. easier remove deprecated .env argument. ... suboptimal position.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample n rows from a table — sample_n","text":"","code":"sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) sample_frac(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample n rows from a table — sample_n","text":"tbl data.frame. size sample_n(), number rows select. sample_frac(), fraction rows select. tbl grouped, size applies group. replace Sample without replacement? weight Sampling weights. must evaluate vector non-negative numbers length input. Weights automatically standardised sum 1. .env DEPRECATED. ... ignored","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample n rows from a table — sample_n","text":"","code":"df <- tibble(x = 1:5, w = c(0.1, 0.1, 0.1, 2, 2)) # sample_n() -> slice_sample() ---------------------------------------------- # Was: sample_n(df, 3) #> # A tibble: 3 × 2 #> x w #> #> 1 3 0.1 #> 2 4 2 #> 3 2 0.1 sample_n(df, 10, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 3 0.1 #> 2 2 0.1 #> 3 2 0.1 #> 4 5 2 #> 5 1 0.1 #> 6 2 0.1 #> 7 1 0.1 #> 8 2 0.1 #> 9 4 2 #> 10 4 2 sample_n(df, 3, weight = w) #> # A tibble: 3 × 2 #> x w #> #> 1 5 2 #> 2 4 2 #> 3 1 0.1 # Now: slice_sample(df, n = 3) #> # A tibble: 3 × 2 #> x w #> #> 1 5 2 #> 2 3 0.1 #> 3 1 0.1 slice_sample(df, n = 10, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 3 0.1 #> 2 4 2 #> 3 2 0.1 #> 4 1 0.1 #> 5 1 0.1 #> 6 3 0.1 #> 7 4 2 #> 8 1 0.1 #> 9 2 0.1 #> 10 5 2 slice_sample(df, n = 3, weight_by = w) #> # A tibble: 3 × 2 #> x w #> #> 1 4 2 #> 2 5 2 #> 3 2 0.1 # Note that sample_n() would error if n was bigger than the group size # slice_sample() will just use the available rows for consistency with # the other slice helpers like slice_head() try(sample_n(df, 10)) #> Error in sample_n(df, 10) : Can't compute indices. #> Caused by error: #> ! `size` must be less than or equal to 5 (size of data). #> ℹ set `replace = TRUE` to use sampling with replacement. slice_sample(df, n = 10) #> # A tibble: 5 × 2 #> x w #> #> 1 2 0.1 #> 2 3 0.1 #> 3 5 2 #> 4 1 0.1 #> 5 4 2 # sample_frac() -> slice_sample() ------------------------------------------- # Was: sample_frac(df, 0.25) #> # A tibble: 1 × 2 #> x w #> #> 1 1 0.1 sample_frac(df, 2, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 3 0.1 #> 2 1 0.1 #> 3 5 2 #> 4 4 2 #> 5 1 0.1 #> 6 5 2 #> 7 3 0.1 #> 8 1 0.1 #> 9 2 0.1 #> 10 3 0.1 # Now: slice_sample(df, prop = 0.25) #> # A tibble: 1 × 2 #> x w #> #> 1 3 0.1 slice_sample(df, prop = 2, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 2 0.1 #> 2 3 0.1 #> 3 5 2 #> 4 2 0.1 #> 5 2 0.1 #> 6 5 2 #> 7 1 0.1 #> 8 2 0.1 #> 9 1 0.1 #> 10 4 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/scoped.html","id":null,"dir":"Reference","previous_headings":"","what":"Operate on a selection of variables — scoped","title":"Operate on a selection of variables — scoped","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. variants suffixed _if, _at _all apply expression (sometimes several) variables within specified subset. subset can contain variables (_all variants), vars() selection (_at variants), variables selected predicate (_if variants). verbs scoped variants : mutate(), transmute() summarise(). See summarise_all(). filter(). See filter_all(). group_by(). See group_by_all(). rename() select(). See select_all(). arrange(). See arrange_all() three kinds scoped variants. differ scope variable selection operations applied: Verbs suffixed _all() apply operation variables. Verbs suffixed _at() apply operation subset variables specified quoting function vars(). quoting function accepts tidyselect::vars_select() helpers like starts_with(). Instead vars() selection, can also supply integerish vector column positions character vector column names. Verbs suffixed _if() apply operation subset variables predicate function returns TRUE. Instead predicate function, can also supply logical vector.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/scoped.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Operate on a selection of variables — scoped","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. ... Additional arguments function calls .funs. evaluated , tidy dots support.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/scoped.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Operate on a selection of variables — scoped","text":"operations also apply grouping variables part selection. includes: arrange_all(), arrange_at(), arrange_if() distinct_all(), distinct_at(), distinct_if() filter_all(), filter_at(), filter_if() group_by_all(), group_by_at(), group_by_if() select_all(), select_at(), select_if() case summarising mutating variants operations applied grouping variables. behaviour depends whether selection implicit (selections) explicit (selections). Grouping variables covered explicit selections (summarise_at(), mutate_at(), transmute_at()) always error. implicit selections, grouping variables always ignored. case, level verbosity depends kind operation: Summarising operations (summarise_all() summarise_if()) ignore grouping variables silently obvious operations applied grouping variables. hand obvious case mutating operations (mutate_all(), mutate_if(), transmute_all(), transmute_if()). reason, issue message indicating grouping variables ignored.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/se-deprecated.html","id":null,"dir":"Reference","previous_headings":"","what":"Deprecated SE versions of main verbs. — se-deprecated","title":"Deprecated SE versions of main verbs. — se-deprecated","text":"dplyr used offer twin versions verb suffixed underscore. versions standard evaluation (SE) semantics: rather taking arguments code, like NSE verbs, took arguments value. purpose make possible program dplyr. However, dplyr now uses tidy evaluation semantics. NSE verbs still capture arguments, can now unquote parts arguments. offers full programmability NSE verbs. Thus, underscored versions now superfluous. Unquoting triggers immediate evaluation operand inlines result within captured expression. result can value expression evaluated later rest argument. See vignette(\"programming\") information.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/se-deprecated.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Deprecated SE versions of main verbs. — se-deprecated","text":"","code":"add_count_(x, vars, wt = NULL, sort = FALSE) add_tally_(x, wt, sort = FALSE) arrange_(.data, ..., .dots = list()) count_(x, vars, wt = NULL, sort = FALSE, .drop = group_by_drop_default(x)) distinct_(.data, ..., .dots, .keep_all = FALSE) do_(.data, ..., .dots = list()) filter_(.data, ..., .dots = list()) funs_(dots, args = list(), env = base_env()) group_by_(.data, ..., .dots = list(), add = FALSE) group_indices_(.data, ..., .dots = list()) mutate_(.data, ..., .dots = list()) tally_(x, wt, sort = FALSE) transmute_(.data, ..., .dots = list()) rename_(.data, ..., .dots = list()) rename_vars_(vars, args) select_(.data, ..., .dots = list()) select_vars_(vars, args, include = chr(), exclude = chr()) slice_(.data, ..., .dots = list()) summarise_(.data, ..., .dots = list()) summarize_(.data, ..., .dots = list())"},{"path":"https://dplyr.tidyverse.org/dev/reference/se-deprecated.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Deprecated SE versions of main verbs. — se-deprecated","text":"x tbl() vars Various meanings depending verb. wt Frequency weights. Can NULL variable: NULL (default), counts number rows group. variable, computes sum(wt) group. sort TRUE, show largest groups top. .data data frame. .drop Drop groups formed factor levels appear data? default TRUE except .data previously grouped .drop = FALSE. See group_by_drop_default() details. .keep_all TRUE, keep variables .data. combination ... distinct, keeps first row values. dots, .dots, ... Pair/values expressions coercible lazy objects. args Various meanings depending verb. env environment functions evaluated. add FALSE, default, group_by() override existing groups. add existing groups, use .add = TRUE. argument previously called add, prevented creating new grouping variable called add, conflicts naming conventions. include, exclude Character vector column names always include/exclude.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":null,"dir":"Reference","previous_headings":"","what":"Keep or drop columns using their names and types — select","title":"Keep or drop columns using their names and types — select","text":"Select (optionally rename) variables data frame, using concise mini-language makes easy refer variables based name (e.g. :f selects columns left f right) type (e.g. (.numeric) selects numeric columns).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"overview-of-selection-features","dir":"Reference","previous_headings":"","what":"Overview of selection features","title":"Keep or drop columns using their names and types — select","text":"Tidyverse selections implement dialect R operators make easy select variables: : selecting range consecutive variables. ! taking complement set variables. & | selecting intersection union two sets variables. c() combining selections. addition, can use selection helpers. helpers select specific columns: everything(): Matches variables. last_col(): Select last variable, possibly offset. group_cols(): Select grouping columns. helpers select variables matching patterns names: starts_with(): Starts prefix. ends_with(): Ends suffix. contains(): Contains literal string. matches(): Matches regular expression. num_range(): Matches numerical range like x01, x02, x03. variables stored character vector: all_of(): Matches variable names character vector. names must present, otherwise --bounds error thrown. any_of(): all_of(), except error thrown names exist. using predicate function: (): Applies function variables selects function returns TRUE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Keep or drop columns using their names and types — select","text":"","code":"select(.data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Keep or drop columns using their names and types — select","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... One unquoted expressions separated commas. Variable names can used positions data frame, expressions like x:y can used select range variables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Keep or drop columns using their names and types — select","text":"object type .data. output following properties: Rows affected. Output columns subset input columns, potentially different order. Columns renamed new_name = old_name form used. Data frame attributes preserved. Groups maintained; select grouping variables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Keep or drop columns using their names and types — select","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Keep or drop columns using their names and types — select","text":"show usage basic selection operators. See specific help pages learn helpers like starts_with(). selection language can used functions like dplyr::select() tidyr::pivot_longer(). first attach tidyverse: Select variables name: Select multiple variables separating commas. Note order columns determined order inputs: Functions like tidyr::pivot_longer() take variables dots. case use c() select multiple variables:","code":"library(tidyverse) # For better printing iris <- as_tibble(iris) starwars %>% select(height) #> # A tibble: 87 x 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # i 83 more rows iris %>% pivot_longer(Sepal.Length) #> # A tibble: 150 x 6 #> Sepal.Width Petal.Length Petal.Width Species name value #> #> 1 3.5 1.4 0.2 setosa Sepal.Length 5.1 #> 2 3 1.4 0.2 setosa Sepal.Length 4.9 #> 3 3.2 1.3 0.2 setosa Sepal.Length 4.7 #> 4 3.1 1.5 0.2 setosa Sepal.Length 4.6 #> # i 146 more rows starwars %>% select(homeworld, height, mass) #> # A tibble: 87 x 3 #> homeworld height mass #> #> 1 Tatooine 172 77 #> 2 Tatooine 167 75 #> 3 Naboo 96 32 #> 4 Tatooine 202 136 #> # i 83 more rows iris %>% pivot_longer(c(Sepal.Length, Petal.Length)) #> # A tibble: 300 x 5 #> Sepal.Width Petal.Width Species name value #> #> 1 3.5 0.2 setosa Sepal.Length 5.1 #> 2 3.5 0.2 setosa Petal.Length 1.4 #> 3 3 0.2 setosa Sepal.Length 4.9 #> 4 3 0.2 setosa Petal.Length 1.4 #> # i 296 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"operators-","dir":"Reference","previous_headings":"","what":"Operators:","title":"Keep or drop columns using their names and types — select","text":": operator selects range consecutive variables: ! operator negates selection: & | take intersection union two selections: take difference two selections, combine & ! operators:","code":"starwars %>% select(name:mass) #> # A tibble: 87 x 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # i 83 more rows starwars %>% select(!(name:mass)) #> # A tibble: 87 x 11 #> hair_color skin_color eye_color birth_year sex gender homeworld species #> #> 1 blond fair blue 19 male masculine Tatooine Human #> 2 gold yellow 112 none masculine Tatooine Droid #> 3 white, blue red 33 none masculine Naboo Droid #> 4 none white yellow 41.9 male masculine Tatooine Human #> # i 83 more rows #> # i 3 more variables: films , vehicles , starships iris %>% select(!c(Sepal.Length, Petal.Length)) #> # A tibble: 150 x 3 #> Sepal.Width Petal.Width Species #> #> 1 3.5 0.2 setosa #> 2 3 0.2 setosa #> 3 3.2 0.2 setosa #> 4 3.1 0.2 setosa #> # i 146 more rows iris %>% select(!ends_with(\"Width\")) #> # A tibble: 150 x 3 #> Sepal.Length Petal.Length Species #> #> 1 5.1 1.4 setosa #> 2 4.9 1.4 setosa #> 3 4.7 1.3 setosa #> 4 4.6 1.5 setosa #> # i 146 more rows iris %>% select(starts_with(\"Petal\") & ends_with(\"Width\")) #> # A tibble: 150 x 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> # i 146 more rows iris %>% select(starts_with(\"Petal\") | ends_with(\"Width\")) #> # A tibble: 150 x 3 #> Petal.Length Petal.Width Sepal.Width #> #> 1 1.4 0.2 3.5 #> 2 1.4 0.2 3 #> 3 1.3 0.2 3.2 #> 4 1.5 0.2 3.1 #> # i 146 more rows iris %>% select(starts_with(\"Petal\") & !ends_with(\"Width\")) #> # A tibble: 150 x 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> # i 146 more rows"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Select and rename a selection of variables — select_all","title":"Select and rename a selection of variables — select_all","text":"rename_if(), rename_at(), rename_all() superseded rename_with(). matching select statements superseded combination select() + rename_with(). predicate functions passed arguments select() rename_with() must wrapped (). functions superseded mutate_if() friends superseded across(). select_if() rename_if() already use tidy selection replaced across() instead need new function.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select and rename a selection of variables — select_all","text":"","code":"select_all(.tbl, .funs = list(), ...) rename_all(.tbl, .funs = list(), ...) select_if(.tbl, .predicate, .funs = list(), ...) rename_if(.tbl, .predicate, .funs = list(), ...) select_at(.tbl, .vars, .funs = list(), ...) rename_at(.tbl, .vars, .funs = list(), ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select and rename a selection of variables — select_all","text":".tbl tbl object. .funs function fun, purrr style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select and rename a selection of variables — select_all","text":"","code":"mtcars <- as_tibble(mtcars) # for nicer printing mtcars %>% rename_all(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # NB: the transformation comes first in rename_with is_whole <- function(x) all(floor(x) == x) mtcars %>% rename_if(is_whole, toupper) #> # A tibble: 32 × 11 #> mpg CYL disp HP drat wt qsec VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper, where(is_whole)) #> # A tibble: 32 × 11 #> mpg CYL disp HP drat wt qsec VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows mtcars %>% rename_at(vars(mpg:hp), toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper, mpg:hp) #> # A tibble: 32 × 11 #> MPG CYL DISP HP drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # You now must select() and then rename mtcars %>% select_all(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # Selection drops unselected variables: mtcars %>% select_if(is_whole, toupper) #> # A tibble: 32 × 6 #> CYL HP VS AM GEAR CARB #> #> 1 6 110 0 1 4 4 #> 2 6 110 0 1 4 4 #> 3 4 93 1 1 4 1 #> 4 6 110 1 0 3 1 #> 5 8 175 0 0 3 2 #> 6 6 105 1 0 3 1 #> 7 8 245 0 0 3 4 #> 8 4 62 1 0 4 2 #> 9 4 95 1 0 4 2 #> 10 6 123 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% select(where(is_whole)) %>% rename_with(toupper) #> # A tibble: 32 × 6 #> CYL HP VS AM GEAR CARB #> #> 1 6 110 0 1 4 4 #> 2 6 110 0 1 4 4 #> 3 4 93 1 1 4 1 #> 4 6 110 1 0 3 1 #> 5 8 175 0 0 3 2 #> 6 6 105 1 0 3 1 #> 7 8 245 0 0 3 4 #> 8 4 62 1 0 4 2 #> 9 4 95 1 0 4 2 #> 10 6 123 1 0 4 4 #> # ℹ 22 more rows mtcars %>% select_at(vars(-contains(\"ar\"), starts_with(\"c\")), toupper) #> # A tibble: 32 × 10 #> MPG CYL DISP HP DRAT WT QSEC VS AM CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 #> # ℹ 22 more rows # -> mtcars %>% select(!contains(\"ar\") | starts_with(\"c\")) %>% rename_with(toupper) #> # A tibble: 32 × 10 #> MPG CYL DISP HP DRAT WT QSEC VS AM CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 #> # ℹ 22 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":null,"dir":"Reference","previous_headings":"","what":"Set operations — setops","title":"Set operations — setops","text":"Perform set operations using rows data frame. intersect(x, y) finds rows x y. union(x, y) finds rows either x y, excluding duplicates. union_all(x, y) finds rows either x y, including duplicates. setdiff(x, y) finds rows x y. symdiff(x, y) computes symmetric difference, .e. rows x y rows y x. setequal(x, y) returns TRUE x y contain rows (ignoring order). Note intersect(), union(), setdiff(), symdiff() remove duplicates x y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set operations — setops","text":"","code":"intersect(x, y, ...) union(x, y, ...) union_all(x, y, ...) setdiff(x, y, ...) setequal(x, y, ...) symdiff(x, y, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set operations — setops","text":"x, y Pair compatible data frames. pair data frames compatible column names (possibly different orders) compatible types. ... dots future extensions must empty.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"base-functions","dir":"Reference","previous_headings":"","what":"Base functions","title":"Set operations — setops","text":"intersect(), union(), setdiff(), setequal() override base functions name order make generic. existing behaviour vectors preserved providing default methods call base functions.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Set operations — setops","text":"","code":"df1 <- tibble(x = 1:3) df2 <- tibble(x = 3:5) intersect(df1, df2) #> # A tibble: 1 × 1 #> x #> #> 1 3 union(df1, df2) #> # A tibble: 5 × 1 #> x #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 union_all(df1, df2) #> # A tibble: 6 × 1 #> x #> #> 1 1 #> 2 2 #> 3 3 #> 4 3 #> 5 4 #> 6 5 setdiff(df1, df2) #> # A tibble: 2 × 1 #> x #> #> 1 1 #> 2 2 setdiff(df2, df1) #> # A tibble: 2 × 1 #> x #> #> 1 4 #> 2 5 symdiff(df1, df2) #> # A tibble: 4 × 1 #> x #> #> 1 1 #> 2 2 #> 3 4 #> 4 5 setequal(df1, df2) #> [1] FALSE setequal(df1, df1[3:1, ]) #> [1] TRUE # Note that the following functions remove pre-existing duplicates: df1 <- tibble(x = c(1:3, 3, 3)) df2 <- tibble(x = c(3:5, 5)) intersect(df1, df2) #> # A tibble: 1 × 1 #> x #> #> 1 3 union(df1, df2) #> # A tibble: 5 × 1 #> x #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 setdiff(df1, df2) #> # A tibble: 2 × 1 #> x #> #> 1 1 #> 2 2 symdiff(df1, df2) #> # A tibble: 4 × 1 #> x #> #> 1 1 #> 2 2 #> 3 4 #> 4 5"},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":null,"dir":"Reference","previous_headings":"","what":"Subset rows using their positions — slice","title":"Subset rows using their positions — slice","text":"slice() lets index rows (integer) locations. allows select, remove, duplicate rows. accompanied number helpers common use cases: slice_head() slice_tail() select first last rows. slice_sample() randomly selects rows. slice_min() slice_max() select rows smallest largest values variable. .data grouped_df, operation performed group, (e.g.) slice_head(df, n = 5) select first five rows group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Subset rows using their positions — slice","text":"","code":"slice(.data, ..., .by = NULL, .preserve = FALSE) slice_head(.data, ..., n, prop, by = NULL) slice_tail(.data, ..., n, prop, by = NULL) slice_min( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) slice_max( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) slice_sample(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Subset rows using their positions — slice","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... slice(): Integer row values. Provide either positive values keep, negative values drop. values provided must either positive negative. Indices beyond number rows input silently ignored. slice_*(), arguments passed methods. ., Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .preserve Relevant .data input grouped. .preserve = FALSE (default), grouping structure recalculated based resulting data, otherwise grouping kept . n, prop Provide either n, number rows, prop, proportion rows select. neither supplied, n = 1 used. n greater number rows group (prop > 1), result silently truncated group size. prop rounded towards zero generate integer number rows. negative value n prop subtracted group size. example, n = -2 group 5 rows select 5 - 2 = 3 rows; prop = -0.25 8 rows select 8 * (1 - 0.25) = 6 rows. order_by Variable function variables order . order multiple variables, wrap data frame tibble. with_ties ties kept together? default, TRUE, may return rows request. Use FALSE ignore ties, return first n rows. na_rm missing values order_by removed result? FALSE, NA values sorted end (like arrange()), included insufficient non-missing values reach n/prop. weight_by Sampling weights. must evaluate vector non-negative numbers length input. Weights automatically standardised sum 1. replace sampling performed (TRUE) without (FALSE, default) replacement.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Subset rows using their positions — slice","text":"object type .data. output following properties: row may appear 0, 1, many times output. Columns modified. Groups modified. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Subset rows using their positions — slice","text":"Slice work relational databases intrinsic notion row order. want perform equivalent operation, use filter() row_number().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Subset rows using their positions — slice","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: slice(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_head(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_tail(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_min(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_max(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_sample(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Subset rows using their positions — slice","text":"","code":"# Similar to head(mtcars, 1): mtcars %>% slice(1L) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 # Similar to tail(mtcars, 1): mtcars %>% slice(n()) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 mtcars %>% slice(5:n()) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # Rows can be dropped with negative indices: slice(mtcars, -(1:4)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # First and last rows based on existing order mtcars %>% slice_head(n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 mtcars %>% slice_tail(n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.6 1 1 4 2 # Rows with minimum and maximum values of a variable mtcars %>% slice_min(mpg, n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 mtcars %>% slice_max(mpg, n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 # slice_min() and slice_max() may return more rows than requested # in the presence of ties. mtcars %>% slice_min(cyl, n = 1) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # Use with_ties = FALSE to return exactly n matches mtcars %>% slice_min(cyl, n = 1, with_ties = FALSE) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1 # Or use additional variables to break the tie: mtcars %>% slice_min(tibble(cyl, mpg), n = 1) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 # slice_sample() allows you to random select with or without replacement mtcars %>% slice_sample(n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 mtcars %>% slice_sample(n = 5, replace = TRUE) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 # you can optionally weight by a variable - this code weights by the # physical weight of the cars, so heavy cars are more likely to get # selected mtcars %>% slice_sample(weight_by = wt, n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Dodge Challenger 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 # Group wise operation ---------------------------------------- df <- tibble( group = rep(c(\"a\", \"b\", \"c\"), c(1, 2, 4)), x = runif(7) ) # All slice helpers operate per group, silently truncating to the group # size, so the following code works without error df %>% group_by(group) %>% slice_head(n = 2) #> # A tibble: 5 × 2 #> # Groups: group [3] #> group x #> #> 1 a 0.634 #> 2 b 0.771 #> 3 b 0.502 #> 4 c 0.711 #> 5 c 0.0919 # When specifying the proportion of rows to include non-integer sizes # are rounded down, so group a gets 0 rows df %>% group_by(group) %>% slice_head(prop = 0.5) #> # A tibble: 3 × 2 #> # Groups: group [2] #> group x #> #> 1 b 0.771 #> 2 c 0.711 #> 3 c 0.0919 # Filter equivalents -------------------------------------------- # slice() expressions can often be written to use `filter()` and # `row_number()`, which can also be translated to SQL. For many databases, # you'll need to supply an explicit variable to use to compute the row number. filter(mtcars, row_number() == 1L) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 filter(mtcars, row_number() == n()) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 filter(mtcars, between(row_number(), 5, n())) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/sql.html","id":null,"dir":"Reference","previous_headings":"","what":"SQL escaping. — sql","title":"SQL escaping. — sql","text":"functions critical writing functions translate R functions sql functions. Typically conversion function escape inputs return sql object.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sql.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"SQL escaping. — sql","text":"","code":"sql(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/sql.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"SQL escaping. — sql","text":"... Character vectors combined single SQL expression.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ","title":"Create a ","text":"src() standard constructor srcs .src() tests.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ","text":"","code":"src(subclass, ...) is.src(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a ","text":"subclass name subclass. \"src\" abstract base class, must supply value. src_ automatically prepended class name ... fields used object. dots evaluated explicit splicing. x object test \"src\"-ness.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":null,"dir":"Reference","previous_headings":"","what":"Source for database backends — src_dbi","title":"Source for database backends — src_dbi","text":"functions deprecated; instead please use tbl() directly DBIConnection. See https://dbplyr.tidyverse.org/ details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Source for database backends — src_dbi","text":"","code":"src_mysql( dbname, host = NULL, port = 0L, username = \"root\", password = \"\", ... ) src_postgres( dbname = NULL, host = NULL, port = NULL, user = NULL, password = NULL, ... ) src_sqlite(path, create = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Source for database backends — src_dbi","text":"dbname Database name host, port Host name port number database ... src, arguments passed underlying database connector, DBI::dbConnect(). tbl, included compatibility generic, otherwise ignored. user, username, password User name password. Generally, avoid saving username password scripts easy accidentally expose valuable credentials. Instead, retrieve environment variables, use database specific credential scores. example, MySQL can set .cnf described RMySQL::MySQL(). path Path SQLite database. can use special path \":memory:\" create temporary memory database. create FALSE, path must already exist. TRUE, create new SQLite3 database path path exist connect existing database path exist.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Source for database backends — src_dbi","text":"S3 object class src_dbi, src_sql, src.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Source for database backends — src_dbi","text":"","code":"con <- DBI::dbConnect(RSQLite::SQLite(), \":memory:\") copy_to(con, mtcars) # To retrieve a single table from a source, use `tbl()` mtcars <- con %>% tbl(\"mtcars\") mtcars #> # Source: table<`mtcars`> [?? x 11] #> # Database: sqlite 3.45.2 [:memory:] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ more rows # You can also use pass raw SQL if you want a more sophisticated query con %>% tbl(sql(\"SELECT * FROM mtcars WHERE cyl == 8\")) #> # Source: SQL [?? x 11] #> # Database: sqlite 3.45.2 [:memory:] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 2 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 3 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 #> 4 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3 #> 5 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 6 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 7 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 8 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 9 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2 #> 10 15.2 8 304 150 3.15 3.44 17.3 0 0 3 2 #> # ℹ more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_local.html","id":null,"dir":"Reference","previous_headings":"","what":"A local source — src_local","title":"A local source — src_local","text":"function deprecated since existed support style testing dplyr backends turned useful.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_local.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A local source — src_local","text":"","code":"src_local(tbl, pkg = NULL, env = NULL) src_df(pkg = NULL, env = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_local.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A local source — src_local","text":"tbl name function used generate tbl objects pkg, env Either name package environment object look objects.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_tbls.html","id":null,"dir":"Reference","previous_headings":"","what":"List all tbls provided by a source. — src_tbls","title":"List all tbls provided by a source. — src_tbls","text":"generic method individual src's provide methods . methods documented usually pretty obvious possible results .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_tbls.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List all tbls provided by a source. — src_tbls","text":"","code":"src_tbls(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_tbls.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List all tbls provided by a source. — src_tbls","text":"x data src. ... arguments passed individual methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":null,"dir":"Reference","previous_headings":"","what":"Starwars characters — starwars","title":"Starwars characters — starwars","text":"original data, SWAPI, Star Wars API, https://swapi.py4e.com/, revised reflect additional research gender sex determinations characters.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Starwars characters — starwars","text":"","code":"starwars"},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Starwars characters — starwars","text":"tibble 87 rows 14 variables: name Name character height Height (cm) mass Weight (kg) hair_color,skin_color,eye_color Hair, skin, eye colors birth_year Year born (BBY = Battle Yavin) sex biological sex character, namely male, female, hermaphroditic, none (case Droids). gender gender role gender identity character determined personality way programmed (case Droids). homeworld Name homeworld species Name species films List films character appeared vehicles List vehicles character piloted starships List starships character piloted","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Starwars characters — starwars","text":"","code":"starwars #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Leia Org… 150 49 brown light brown 19 fema… #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Beru Whi… 165 75 brown light blue 47 fema… #> 8 R5-D4 97 32 NA white, red red NA none #> 9 Biggs Da… 183 84 black light brown 24 male #> 10 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":null,"dir":"Reference","previous_headings":"","what":"Storm tracks data — storms","title":"Storm tracks data — storms","text":"dataset NOAA Atlantic hurricane database best track data, https://www.nhc.noaa.gov/data/#hurdat. data includes positions attributes storms 1975-2022. Storms 1979 onward measured every six hours lifetime storm. Storms earlier years missing data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Storm tracks data — storms","text":"","code":"storms"},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Storm tracks data — storms","text":"tibble 19,537 observations 13 variables: name Storm Name year,month,day Date report hour Hour report (UTC) lat,long Location storm center status Storm classification (Tropical Depression, Tropical Storm, Hurricane) category Saffir-Simpson hurricane category calculated wind speed. NA: hurricane 1: 64+ knots 2: 83+ knots 3: 96+ knots 4: 113+ knots 5: 137+ knots wind storm's maximum sustained wind speed (knots) pressure Air pressure storm's center (millibars) tropicalstorm_force_diameter Diameter (nautical miles) area experiencing tropical storm strength winds (34 knots ). available starting 2004. hurricane_force_diameter Diameter (nautical miles) area experiencing hurricane strength winds (64 knots ). available starting 2004.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Storm tracks data — storms","text":"","code":"storms #> # A tibble: 19,537 × 13 #> name year month day hour lat long status category wind #> #> 1 Amy 1975 6 27 0 27.5 -79 tropical depr… NA 25 #> 2 Amy 1975 6 27 6 28.5 -79 tropical depr… NA 25 #> 3 Amy 1975 6 27 12 29.5 -79 tropical depr… NA 25 #> 4 Amy 1975 6 27 18 30.5 -79 tropical depr… NA 25 #> 5 Amy 1975 6 28 0 31.5 -78.8 tropical depr… NA 25 #> 6 Amy 1975 6 28 6 32.4 -78.7 tropical depr… NA 25 #> 7 Amy 1975 6 28 12 33.3 -78 tropical depr… NA 25 #> 8 Amy 1975 6 28 18 34 -77 tropical depr… NA 30 #> 9 Amy 1975 6 29 0 34.4 -75.8 tropical storm NA 35 #> 10 Amy 1975 6 29 6 34 -74.8 tropical storm NA 40 #> # ℹ 19,527 more rows #> # ℹ 3 more variables: pressure , tropicalstorm_force_diameter , #> # hurricane_force_diameter # Show a few recent storm paths if (requireNamespace(\"ggplot2\", quietly = TRUE)) { library(ggplot2) storms %>% filter(year >= 2000) %>% ggplot(aes(long, lat, color = paste(year, name))) + geom_path(show.legend = FALSE) + facet_wrap(~year) } storms #> # A tibble: 19,537 × 13 #> name year month day hour lat long status category wind #> #> 1 Amy 1975 6 27 0 27.5 -79 tropical depr… NA 25 #> 2 Amy 1975 6 27 6 28.5 -79 tropical depr… NA 25 #> 3 Amy 1975 6 27 12 29.5 -79 tropical depr… NA 25 #> 4 Amy 1975 6 27 18 30.5 -79 tropical depr… NA 25 #> 5 Amy 1975 6 28 0 31.5 -78.8 tropical depr… NA 25 #> 6 Amy 1975 6 28 6 32.4 -78.7 tropical depr… NA 25 #> 7 Amy 1975 6 28 12 33.3 -78 tropical depr… NA 25 #> 8 Amy 1975 6 28 18 34 -77 tropical depr… NA 30 #> 9 Amy 1975 6 29 0 34.4 -75.8 tropical storm NA 35 #> 10 Amy 1975 6 29 6 34 -74.8 tropical storm NA 40 #> # ℹ 19,527 more rows #> # ℹ 3 more variables: pressure , tropicalstorm_force_diameter , #> # hurricane_force_diameter "},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarise each group down to one row — summarise","title":"Summarise each group down to one row — summarise","text":"summarise() creates new data frame. returns one row combination grouping variables; grouping variables, output single row summarising observations input. contain one column grouping variable one column summary statistics specified. summarise() summarize() synonyms.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarise each group down to one row — summarise","text":"","code":"summarise(.data, ..., .by = NULL, .groups = NULL) summarize(.data, ..., .by = NULL, .groups = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarise each group down to one row — summarise","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs summary functions. name name variable result. value can : vector length 1, e.g. min(x), n(), sum(.na(y)). data frame, add multiple columns single expression. Returning values size 0 >1 deprecated 1.1.0. Please use reframe() instead. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .groups Grouping structure result. \"drop_last\": dropping last level grouping. supported option version 1.0.0. \"drop\": levels grouping dropped. \"keep\": grouping structure .data. \"rowwise\": row group. .groups specified, chosen based number rows results: results 1 row, get \"drop_last\". number rows varies, get \"keep\" (note returning variable number rows deprecated favor reframe(), also unconditionally drops levels grouping). addition, message informs choice, unless result ungrouped, option \"dplyr.summarise.inform\" set FALSE, summarise() called function package.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarise each group down to one row — summarise","text":"object usually type .data. rows come underlying group_keys(). columns combination grouping keys summary expressions provide. grouping structure controlled .groups= argument, output may another grouped_df, tibble rowwise data frame. Data frame attributes preserved, summarise() fundamentally creates new data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"useful-functions","dir":"Reference","previous_headings":"","what":"Useful functions","title":"Summarise each group down to one row — summarise","text":"Center: mean(), median() Spread: sd(), IQR(), mad() Range: min(), max(), Position: first(), last(), nth(), Count: n(), n_distinct() Logical: (), ()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"backend-variations","dir":"Reference","previous_headings":"","what":"Backend variations","title":"Summarise each group down to one row — summarise","text":"data frame backend supports creating variable using summary. means previously created summary variables can transformed combined within summary, mutate(). However, also means summary variables names previous variables overwrite , making variables unavailable later summary variables. behaviour may supported backends. avoid unexpected results, consider using new names summary variables, especially creating multiple summaries.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Summarise each group down to one row — summarise","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame, grouped_df, rowwise_df) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarise each group down to one row — summarise","text":"","code":"# A summary applied to ungrouped tbl returns a single row mtcars %>% summarise(mean = mean(disp), n = n()) #> mean n #> 1 230.7219 32 # Usually, you'll want to group first mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) #> # A tibble: 3 × 3 #> cyl mean n #> #> 1 4 105. 11 #> 2 6 183. 7 #> 3 8 353. 14 # Each summary call removes one grouping level (since that group # is now just a single row) mtcars %>% group_by(cyl, vs) %>% summarise(cyl_n = n()) %>% group_vars() #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> [1] \"cyl\" # BEWARE: reusing variables may lead to unexpected results mtcars %>% group_by(cyl) %>% summarise(disp = mean(disp), sd = sd(disp)) #> # A tibble: 3 × 3 #> cyl disp sd #> #> 1 4 105. NA #> 2 6 183. NA #> 3 8 353. NA # Refer to column names stored as strings with the `.data` pronoun: var <- \"mass\" summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE)) #> # A tibble: 1 × 1 #> avg #> #> 1 97.3 # Learn more in ?rlang::args_data_masking # In dplyr 1.1.0, returning multiple rows per group was deprecated in favor # of `reframe()`, which never messages and always returns an ungrouped # result: mtcars %>% group_by(cyl) %>% summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) #> Warning: Returning more (or less) than 1 row per `summarise()` group was #> deprecated in dplyr 1.1.0. #> ℹ Please use `reframe()` instead. #> ℹ When switching from `summarise()` to `reframe()`, remember that #> `reframe()` always returns an ungrouped data frame and adjust #> accordingly. #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 3 #> # Groups: cyl [3] #> cyl qs prob #> #> 1 4 78.8 0.25 #> 2 4 121. 0.75 #> 3 6 160 0.25 #> 4 6 196. 0.75 #> 5 8 302. 0.25 #> 6 8 390 0.75 # -> mtcars %>% group_by(cyl) %>% reframe(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) #> # A tibble: 6 × 3 #> cyl qs prob #> #> 1 4 78.8 0.25 #> 2 4 121. 0.75 #> 3 6 160 0.25 #> 4 6 196. 0.75 #> 5 8 302. 0.25 #> 6 8 390 0.75"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarise multiple columns — summarise_all","title":"Summarise multiple columns — summarise_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants summarise() make easy apply transformation multiple variables. three variants. summarise_all() affects every variable summarise_at() affects variables selected character vector vars() summarise_if() affects variables selected predicate function","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarise multiple columns — summarise_all","text":"","code":"summarise_all(.tbl, .funs, ...) summarise_if(.tbl, .predicate, .funs, ...) summarise_at(.tbl, .vars, .funs, ..., .cols = NULL) summarize_all(.tbl, .funs, ...) summarize_if(.tbl, .predicate, .funs, ...) summarize_at(.tbl, .vars, .funs, ..., .cols = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarise multiple columns — summarise_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .cols argument renamed .vars fit dplyr's terminology deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarise multiple columns — summarise_all","text":"data frame. default, newly created columns shortest names needed uniquely identify output. force inclusion name, even needed, name input (see examples details).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Summarise multiple columns — summarise_all","text":"applied grouped tibble, operations applied grouping variables. behaviour depends whether selection implicit (selections) explicit (selections). Grouping variables covered explicit selections summarise_at() always error. Add -group_cols() vars() selection avoid : remove group_vars() character vector column names: Grouping variables covered implicit selections silently ignored summarise_all() summarise_if().","code":"data %>% summarise_at(vars(-group_cols(), ...), myoperation) nms <- setdiff(nms, group_vars(data)) data %>% summarise_at(nms, myoperation)"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"naming","dir":"Reference","previous_headings":"","what":"Naming","title":"Summarise multiple columns — summarise_all","text":"names new columns derived names input variables names functions. one unnamed function (.e. .funs unnamed list length one), names input variables used name new columns; _at functions, one unnamed variable (.e., .vars form vars(a_single_column)) .funs length greater one, names functions used name new columns; otherwise, new names created concatenating names input variables names functions, separated underscore \"_\". .funs argument can named unnamed list. function unnamed name derived automatically, name form \"fn#\" used. Similarly, vars() accepts named unnamed arguments. variable .vars named, new column name created. Name collisions new columns disambiguated using unique suffix.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarise multiple columns — summarise_all","text":"","code":"# The _at() variants directly support strings: starwars %>% summarise_at(c(\"height\", \"mass\"), mean, na.rm = TRUE) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # -> starwars %>% summarise(across(c(\"height\", \"mass\"), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): starwars %>% summarise_at(vars(height:mass), mean, na.rm = TRUE) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # -> starwars %>% summarise(across(height:mass, ~ mean(.x, na.rm = TRUE))) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we apply mean() to the numeric columns: starwars %>% summarise_if(is.numeric, mean, na.rm = TRUE) #> # A tibble: 1 × 3 #> height mass birth_year #> #> 1 175. 97.3 87.6 starwars %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 1 × 3 #> height mass birth_year #> #> 1 175. 97.3 87.6 by_species <- iris %>% group_by(Species) # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: by_species %>% summarise_all(list(min, max)) #> # A tibble: 3 × 9 #> Species Sepal.Length_fn1 Sepal.Width_fn1 Petal.Length_fn1 #> #> 1 setosa 4.3 2.3 1 #> 2 versicolor 4.9 2 3 #> 3 virginica 4.9 2.2 4.5 #> # ℹ 5 more variables: Petal.Width_fn1 , Sepal.Length_fn2 , #> # Sepal.Width_fn2 , Petal.Length_fn2 , Petal.Width_fn2 # -> by_species %>% summarise(across(everything(), list(min = min, max = max))) #> # A tibble: 3 × 9 #> Species Sepal.Length_min Sepal.Length_max Sepal.Width_min #> #> 1 setosa 4.3 5.8 2.3 #> 2 versicolor 4.9 7 2 #> 3 virginica 4.9 7.9 2.2 #> # ℹ 5 more variables: Sepal.Width_max , Petal.Length_min , #> # Petal.Length_max , Petal.Width_min , Petal.Width_max "},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_each.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarise and mutate multiple columns. — summarise_each","title":"Summarise and mutate multiple columns. — summarise_each","text":"mutate_each() summarise_each() deprecated favour new across() function works within summarise() mutate().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_each.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarise and mutate multiple columns. — summarise_each","text":"","code":"summarise_each(tbl, funs, ...) summarise_each_(tbl, funs, vars) mutate_each(tbl, funs, ...) mutate_each_(tbl, funs, vars) summarize_each(tbl, funs, ...) summarize_each_(tbl, funs, vars)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a table from a data source — tbl","title":"Create a table from a data source — tbl","text":"generic method dispatches based first argument.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a table from a data source — tbl","text":"","code":"tbl(src, ...) is.tbl(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a table from a data source — tbl","text":"src data source ... arguments passed individual methods x object","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_df.html","id":null,"dir":"Reference","previous_headings":"","what":"Coerce to a tibble — tbl_df","title":"Coerce to a tibble — tbl_df","text":"Please use tibble::as_tibble() instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coerce to a tibble — tbl_df","text":"","code":"tbl_df(data) as.tbl(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Coerce to a tibble — tbl_df","text":"data, x Object coerce","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_ptype.html","id":null,"dir":"Reference","previous_headings":"","what":"Return a prototype of a tbl — tbl_ptype","title":"Return a prototype of a tbl — tbl_ptype","text":"Used _if functions enable type-based selection even data lazily generated. either return complete tibble, can computed quickly, 0-row tibble columns correct type.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_ptype.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return a prototype of a tbl — tbl_ptype","text":"","code":"tbl_ptype(.data)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_vars.html","id":null,"dir":"Reference","previous_headings":"","what":"List variables provided by a tbl. — tbl_vars","title":"List variables provided by a tbl. — tbl_vars","text":"tbl_vars() returns variables tbl_nongroup_vars() returns non-grouping variables. groups attribute object returned tbl_vars() character vector grouping columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_vars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List variables provided by a tbl. — tbl_vars","text":"","code":"tbl_vars(x) tbl_nongroup_vars(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_vars.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List variables provided by a tbl. — tbl_vars","text":"x tbl object","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/tidyeval-compat.html","id":null,"dir":"Reference","previous_headings":"","what":"Other tidy eval tools — tidyeval-compat","title":"Other tidy eval tools — tidyeval-compat","text":"tidy eval functions longer normal usage, still exported dplyr backward compatibility. See ?rlang::args_data_masking vignette(\"programming\") latest recommendations. expr() enquo() enquos() sym() syms() as_label() quo() quos() quo_name() ensym() ensyms() enexpr() enexprs()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":null,"dir":"Reference","previous_headings":"","what":"Select top (or bottom) n rows (by value) — top_n","title":"Select top (or bottom) n rows (by value) — top_n","text":"top_n() superseded favour slice_min()/slice_max(). deprecated near future, retirement means perform critical bug fixes, recommend moving newer alternatives. top_n() superseded name fundamentally confusing returned might reasonably consider bottom rows. Additionally, wt variable confusing name, strange default (last column data frame). Unfortunately see easy way fix existing top_n() function without breaking existing code, created new alternative.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select top (or bottom) n rows (by value) — top_n","text":"","code":"top_n(x, n, wt) top_frac(x, n, wt)"},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select top (or bottom) n rows (by value) — top_n","text":"x data frame. n Number rows return top_n(), fraction rows return top_frac(). n positive, selects top rows. negative, selects bottom rows. x grouped, number (fraction) rows per group. include rows ties. wt (Optional). variable use ordering. specified, defaults last variable tbl.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select top (or bottom) n rows (by value) — top_n","text":"","code":"df <- data.frame(x = c(6, 4, 1, 10, 3, 1, 1)) df %>% top_n(2) # highest values #> Selecting by x #> x #> 1 6 #> 2 10 df %>% top_n(-2) # lowest values #> Selecting by x #> x #> 1 1 #> 2 1 #> 3 1 # now use df %>% slice_max(x, n = 2) #> x #> 1 10 #> 2 6 df %>% slice_min(x, n = 2) #> x #> 1 1 #> 2 1 #> 3 1 # top_frac() -> prop argument of slice_min()/slice_max() df %>% top_frac(.5) #> Selecting by x #> x #> 1 6 #> 2 4 #> 3 10 # -> df %>% slice_max(x, prop = 0.5) #> x #> 1 10 #> 2 6 #> 3 4"},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":null,"dir":"Reference","previous_headings":"","what":"Create, modify, and delete columns — transmute","title":"Create, modify, and delete columns — transmute","text":"transmute() creates new data frame containing specified computations. superseded can perform job mutate(.keep = \"none\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create, modify, and delete columns — transmute","text":"","code":"transmute(.data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create, modify, and delete columns — transmute","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs. name gives name column output. value can : vector length 1, recycled correct length. vector length current group (whole data frame ungrouped). NULL, remove column. data frame tibble, create multiple columns output.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create, modify, and delete columns — transmute","text":"object type .data. output following properties: Columns created modified ... returned order specified .... Unmodified grouping columns placed front. number rows affected. Columns given value NULL removed. Groups recomputed grouping variable mutated. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Create, modify, and delete columns — transmute","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/vars.html","id":null,"dir":"Reference","previous_headings":"","what":"Select variables — vars","title":"Select variables — vars","text":"vars() superseded needed scoped verbs (.e. mutate_at(), summarise_at(), friends), superseded favour across(). See vignette(\"colwise\") details. helper intended provide tidy-select semantics scoped verbs like mutate_at() summarise_at(). Note anywhere can supply vars() specification, can also supply numeric vector column positions character vector column names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/vars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select variables — vars","text":"","code":"vars(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/vars.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select variables — vars","text":"... Variables operate .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform an operation with temporary groups — with_groups","title":"Perform an operation with temporary groups — with_groups","text":"experimental function allows modify grouping variables single operation; superseded favour using .argument individual verbs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform an operation with temporary groups — with_groups","text":"","code":"with_groups(.data, .groups, .f, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform an operation with temporary groups — with_groups","text":".data data frame .groups One variables group . Unlike group_by(), can group existing variables, can use tidy-select syntax like c(x, y, z) select multiple variables. Use NULL temporarily ungroup. .f Function apply regrouped data. Supports purrr-style ~ syntax ... Additional arguments passed ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Perform an operation with temporary groups — with_groups","text":"","code":"df <- tibble(g = c(1, 1, 2, 2, 3), x = runif(5)) # Old df %>% with_groups(g, mutate, x_mean = mean(x)) #> # A tibble: 5 × 3 #> g x x_mean #> #> 1 1 0.764 0.791 #> 2 1 0.819 0.791 #> 3 2 0.761 0.795 #> 4 2 0.829 0.795 #> 5 3 0.00851 0.00851 # New df %>% mutate(x_mean = mean(x), .by = g) #> # A tibble: 5 × 3 #> g x x_mean #> #> 1 1 0.764 0.791 #> 2 1 0.819 0.791 #> 3 2 0.761 0.795 #> 4 2 0.829 0.795 #> 5 3 0.00851 0.00851"},{"path":"https://dplyr.tidyverse.org/dev/reference/with_order.html","id":null,"dir":"Reference","previous_headings":"","what":"Run a function with one order, translating result back to original order — with_order","title":"Run a function with one order, translating result back to original order — with_order","text":"used power ordering parameters dplyr's window functions","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/with_order.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run a function with one order, translating result back to original order — with_order","text":"","code":"with_order(order_by, fun, x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/with_order.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run a function with one order, translating result back to original order — with_order","text":"order_by vector order fun window function x, ... arguments f","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-development-version","dir":"Changelog","previous_headings":"","what":"dplyr (development version)","title":"dplyr (development version)","text":"R >=3.6.0 now explicitly required (#7026).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-114","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.4","title":"dplyr 1.1.4","text":"CRAN release: 2023-11-17 join_by() now allows helper functions namespaced dplyr::, like join_by(dplyr::(x, lower, upper)) (#6838). left_join() friends now return specialized error message detect join return rows dplyr can handle (#6912). slice_*() now throw correct error forget name n also prefixing call dplyr:: (#6946). dplyr_reconstruct()’s default method rewritten avoid materializing duckplyr queries early (#6947). Updated storms data include 2022 data (#6937, @steveharoz). Updated starwars data use new API, old one defunct. minor changes data (#6938, @steveharoz).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-113","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.3","title":"dplyr 1.1.3","text":"CRAN release: 2023-09-03 mutate_each() summarise_each() now throw correct deprecation messages (#6869). setequal() now requires input data frames compatible, similar set methods like setdiff() intersect() (#6786).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-112","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.2","title":"dplyr 1.1.2","text":"CRAN release: 2023-04-20 count() better documents .drop argument (#6820). Fixed tests maintain compatibility next version waldo (#6823). Joins better handle key columns NAs (#6804).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-111","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.1","title":"dplyr 1.1.1","text":"CRAN release: 2023-03-22 Mutating joins now warn multiple matches much less often. high level, warning previously thrown one--many many--many relationship detected keys x y, now thrown many--many relationship, much rarer much dangerous one--many can result Cartesian explosion number rows returned join (#6731, #6717). ’ve accomplished two steps: multiple now defaults \"\", options \"error\" \"warning\" now deprecated favor using relationship (see ). using accelerated deprecation process two options ’ve available weeks, relationship clearly superior alternative. mutating joins gain new relationship argument, allowing optionally enforce one following relationship constraints keys x y: \"one--one\", \"one--many\", \"many--one\", \"many--many\". example, \"many--one\" enforces row x can match 1 row y. row x matches >1 rows y, error thrown. option serves replacement multiple = \"error\". default behavior relationship doesn’t assume relationship x y. However, equality joins check presence many--many relationship, warn detects one. change unfortunately mean set multiple = \"\" avoid warning happened many--many style join, need replace multiple = \"\" relationship = \"many--many\" silence new warning, believe rare since many--many relationships fairly uncommon. Fixed major performance regression case_when(). still little slower dplyr 1.0.10, plan improve future (#6674). Fixed performance regression related nth(), first(), last() (#6682). Fixed issue expressions involving infix operators abnormally large amount overhead (#6681). group_data() ungrouped data frames faster (#6736). n() little faster many groups (#6727). pick() now returns 1 row, 0 column tibble ... evaluates empty selection. makes compatible tidyverse recycling rules edge cases (#6685). if_else() case_when() accept logical conditions attributes (#6678). arrange() can sort numeric_version type base R (#6680). slice_sample() now works input column named replace. slice_min() slice_max() now work input columns named na_rm with_ties (#6725). nth() now errors informatively n NA (#6682). Joins now throw informative error y doesn’t source x (#6798). major dplyr verbs now throw informative error message input data frame contains column named NA \"\" (#6758). Deprecation warnings thrown filter() now mention correct package problem originated (#6679). Fixed issue using <- within grouped mutate() summarise() cross contaminate groups (#6666). compatibility vignette replaced general vignette using dplyr packages, vignette(\"-packages\") (#6702). developer documentation ?dplyr_extending refreshed brought date changes made 1.1.0 (#6695). rename_with() now includes example using paste0(recycle0 = TRUE) correctly handle empty selections (#6688). R >=3.5.0 now explicitly required. line tidyverse policy supporting 5 recent versions R.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-110","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.0","title":"dplyr 1.1.0","text":"CRAN release: 2023-01-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-1-1-0","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 1.1.0","text":"./experimental alternative group_by() supports per-operation grouping mutate(), summarise(), filter(), slice() family (#6528). Rather : can now write: useful reason .affects single operation. example , ungrouped data frame went summarise() call, ungrouped data frame come ; ., never need remember ungroup() afterwards never need use .groups argument. Additionally, using summarise() .never sort results group key, unlike group_by(). Instead, results returned using existing ordering groups original data. feel predictable, better maintains ordering might already applied previous call arrange(), provides way maintain current ordering without resort factors. feature inspired data.table, equivalent syntax looks like: with_groups() superseded favor .(#6582). reframe() new experimental verb creates new data frame applying functions columns existing data frame. similar summarise(), two big differences: reframe() can return arbitrary number rows per group, summarise() reduces group single row. reframe() always returns ungrouped data frame, summarise() might return grouped rowwise data frame, depending scenario. reframe() added response valid concern community allowing summarise() return number rows per group increases chance accidental bugs. still feel powerful technique, principled replacement (), moved features reframe() (#6382). group_by() now uses new algorithm computing groups. often faster previous approach (especially many groups), cases changes. one exception character vectors, see C locale news bullet details (#4406, #6297). arrange() now uses faster algorithm sorting character vectors, heavily inspired data.table’s forder(). See C locale news bullet details (#4962). Joins completely overhauled enable flexible join operations provide tools quality control. Many changes inspired data.table’s join syntax (#5914, #5661, #5413, #2240). join specification can now created join_by(). allows specify left right hand side join using unquoted column names, join_by(sale_date == commercial_date). Join specifications can supplied *_join() function argument. Join specifications allow new types joins: Equality joins: common join, specified ==. example, join_by(sale_date == commercial_date). Inequality joins: joining inequalities, .e.>=, >, <, <=. example, use join_by(sale_date >= commercial_date) find every commercial aired particular sale. Rolling joins: “rolling” closest match forward backwards isn’t exact match, specified using rolling helper, closest(). example, join_by(closest(sale_date >= commercial_date)) find recent commercial aired particular sale. Overlap joins: detecting overlaps sets columns, specified using one overlap helpers: (), within(), overlaps(). example, use join_by((commercial_date, sale_date_lower, sale_date)) find commercials aired particular sale, long occurred lower bound, 40 days sale made. Note use arbitrary expressions join conditions, like join_by(sale_date - 40 >= commercial_date). Instead, use mutate() create new column containing result sale_date - 40 refer name join_by(). multiple new argument controlling happens row x matches multiple rows y. equality joins rolling joins, usually surprising, defaults signalling \"warning\", still returns matches. inequality joins, multiple matches usually expected, defaults returning \"\" matches. can also return \"first\" \"last\" match, \"\" matches, can \"error\". keep now defaults NULL rather FALSE. NULL implies keep = FALSE equality conditions, keep = TRUE inequality conditions, since generally want preserve sides inequality join. unmatched new argument controlling happens row dropped doesn’t match. backwards compatibility, default \"drop\", can also choose \"error\" dropped rows surprising. across() gains experimental .unpack argument optionally unpack (, tidyr::unpack()) data frames returned functions .fns (#6360). consecutive_id() creating groups based contiguous runs values, like data.table::rleid() (#1534). case_match() “vectorised switch” variant case_when() matches values rather logical expressions. like SQL “simple” CASE statement, whereas case_when() like SQL “searched” CASE statement (#6328). cross_join() explicit slightly correct replacement using = character() join (#6604). pick() makes easy access subset columns current group. pick() intended replacement across(.fns = NULL), cur_data(), cur_data_all(). feel pick() much evocative name just trying select subset columns data (#6204). symdiff() computes symmetric difference (#4811).","code":"starwars %>% group_by(species, homeworld) %>% summarise(mean_height = mean(height)) starwars %>% summarise( mean_height = mean(height), .by = c(species, homeworld) ) starwars[, .(mean_height = mean(height)), by = .(species, homeworld)]"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Breaking changes","title":"dplyr 1.1.0","text":"arrange() group_by() now use C locale, system locale, ordering grouping character vectors. brings substantial performance improvements, increases reproducibility across R sessions, makes dplyr consistent data.table, believe affect little existing code. affect code, can use options(dplyr.legacy_locale = TRUE) quickly revert previous behavior. However, general, instead recommend use new .locale argument precisely specify desired locale. full explanation please read associated grouping ordering tidyups. bench_tbls(), compare_tbls(), compare_tbls2(), eval_tbls(), eval_tbls2(), location() changes(), deprecated 1.0.0, now defunct (#6387). frame_data(), data_frame_(), lst_() tbl_sum() longer re-exported tibble (#6276, #6277, #6278, #6284). select_vars(), rename_vars(), select_var() current_vars(), deprecated 0.8.4, now defunct (#6387).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"newly-deprecated-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Newly deprecated","title":"dplyr 1.1.0","text":"across(), c_across(), if_any(), if_all() now require .cols .fns arguments. general, now recommend use pick() instead empty across() call across() .fns (e.g. across(c(x, y)). (#6523). Relying previous default .cols = everything() deprecated. skipped soft-deprecation stage case, indirect usage across() friends way rare. Relying previous default .fns = NULL yet formally soft-deprecated, good alternative now, discouraged soft-deprecated next minor release. Passing ... across() soft-deprecated ’s ambiguous arguments evaluated. Now, instead (e.g.) across(:b, mean, na.rm = TRUE) write across(:b, ~ mean(.x, na.rm = TRUE)) (#6073). all_equal() deprecated. ’ve advised time, explicitly recommend use .equal(), manually reordering rows columns needed (#6324). cur_data() cur_data_all() soft-deprecated favour pick() (#6204). Using = character() perform cross join now soft-deprecated favor cross_join() (#6604). filter()ing 1-column matrix deprecated (#6091). progress_estimate() deprecated uses (#6387). Using summarise() produce 0 >1 row “summary” deprecated favor new reframe(). See NEWS bullet reframe() details (#6382). functions deprecated 1.0.0 (released April 2020) earlier now warn every time use (#6387). includes combine(), src_local(), src_mysql(), src_postgres(), src_sqlite(), rename_vars_(), select_vars_(), summarise_each_(), mutate_each_(), .tbl(), tbl_df(), handful older arguments. likely made defunct next major version (mid 2024). slice()ing 1-column matrix deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"newly-superseded-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Newly superseded","title":"dplyr 1.1.0","text":"recode() superseded favour case_match() (#6433). recode_factor() superseded. don’t direct replacement yet, plan add one forcats. meantime can often use case_match(.ptype = factor(levels = )) instead (#6433). transmute() superseded favour mutate(.keep = \"none\") (#6414).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"newly-stable-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Newly stable","title":"dplyr 1.1.0","text":".keep, ., .arguments mutate() moved experimental stable. rows_*() family functions moved experimental stable.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vctrs-1-1-0","dir":"Changelog","previous_headings":"","what":"vctrs","title":"dplyr 1.1.0","text":"Many dplyr’s vector functions rewritten make use vctrs package, bringing greater consistency improved performance. () can now work vector types, just numeric date-time. Additionally, left right can now also vectors (length x), x, left, right cast common type comparison made (#6183, #6260, #6478). case_when() (#5106): new .default argument intended replace usage TRUE ~ default_value explicit readable way specify default value. future, deprecate unsafe recycling LHS inputs allows TRUE ~ work, encourage switch using .default. longer requires exact matching types RHS values. example, following longer requires use NA_character_. Supports larger variety RHS value types. example, can use data frame create multiple columns . new .ptype .size arguments allow enforce particular output type size. better error types lengths incompatible (#6261, #6206). coalesce() (#6265): Discards NULL inputs front. longer iterates columns data frame input. Instead, row now coalesced entirely missing, consistent vctrs::vec_detect_missing() greatly simplifies implementation. new .ptype .size arguments allow enforce particular output type size. first(), last(), nth() (#6331): used data frame, functions now return single row rather single column. consistent vctrs principle data frame generally treated vector rows. default longer “guessed”, always automatically set missing value appropriate type x. Error n integer. nth(x, n = 2) fine, nth(x, n = 2.5) now error. longer support indexing scalar objects, like scalar S4 objects (#6670). Additionally, gained na_rm argument since summary functions (#6242, contributions @tnederlof). if_else() gains benefits case_when(). particular, if_else() now takes common type true, false, missing determine output type, meaning can now reliably use NA, rather NA_character_ friends (#6243). if_else() also longer allows supply NULL either true false, undocumented usage consider -label, true false intended (documented ) vector inputs (#6730). na_if() (#6329) now casts y type x comparison, makes clearer function type size stable x. particular, means can longer na_if(, 0), previously accidentally allowed replace instance 0 across every column tibble NA. na_if() never intended work way, considered -label usage. can also now replace NaN values x na_if(x, NaN). lag() lead() now cast default type x, rather taking common type. ensures functions type stable x (#6330). row_number(), min_rank(), dense_rank(), ntile(), cume_dist(), percent_rank() faster work types. can now rank multiple columns supplying data frame (#6428). with_order() now checks size order_by size x, now works correctly order_by data frame (#6334).","code":"x <- c(\"little\", \"unknown\", \"small\", \"missing\", \"large\") case_when( x %in% c(\"little\", \"small\") ~ \"one\", x %in% c(\"big\", \"large\") ~ \"two\", x %in% c(\"missing\", \"unknown\") ~ NA )"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-and-bug-fixes-1-1-0","dir":"Changelog","previous_headings":"","what":"Minor improvements and bug fixes","title":"dplyr 1.1.0","text":"Fixed issue latest rlang caused internal tools (mask$eval_all_summarise()) mentioned error messages (#6308). Warnings enriched contextualised information summarise() filter() just like mutate() arrange(). Joins now reference correct column y type error thrown joining two columns different names (#6465). Joins wide tables longer bottlenecked application suffix (#6642). *_join() now error supply additional arguments aren’t used (#6228). across() used without functions inside rowwise-data frame longer generates invalid data frame (#6264). Anonymous functions supplied function() \\() now inlined across() possible, slightly improves performance makes possible optimisations future. Functions supplied across() longer masked columns (#6545). instance, across(1:2, mean) now work expected even column called mean. across() now error supplied ... without .fns argument (#6638). arrange() now correctly ignores NULL inputs (#6193). arrange() now works correctly across() calls used 2nd () ordering expression (#6495). arrange(df, mydesc::desc(x)) works correctly mydesc re-exports dplyr::desc() (#6231). c_across() now evaluates all_of() correctly longer allows accidentally select grouping variables (#6522). c_across() now throws informative error try rename column selection (#6522). dplyr longer provides count() tally() methods tbl_sql. methods accidentally overriding tbl_lazy methods dbplyr provides, resulted issues grouping structure output (#6338, tidyverse/dbplyr#940). cur_group() now works correctly zero row grouped data frames (#6304). desc() gives useful error message give non-vector (#6028). distinct() now retains attributes bare data frames (#6318). distinct() returns columns ordered way request, input data (#6156). Error messages group_by(), distinct(), tally(), count() now relevant (#6139). group_by_prepare() loses caller_env argument. rarely used longer needed (#6444). group_walk() gains explicit .keep argument (#6530). Warnings emitted inside mutate() variants now collected stashed away. Run new last_dplyr_warnings() function see warnings emitted within dplyr verbs last top-level command. fixes performance issues thousands warnings emitted rowwise grouped data frames (#6005, #6236). mutate() behaves little better 0-row rowwise inputs (#6303). rowwise mutate() now automatically unlists list-columns containing length 1 vectors (#6302). nest_join() gained na_matches argument joins . nest_join() now preserves type y (#6295). n_distinct() now errors don’t give input (#6535). nth(), first(), last(), with_order() now sort character order_by vectors C locale. Using character vectors order_by rare, expect little practical impact (#6451). ntile() now requires n single positive integer. relocate() now works correctly empty data frames ..result empty selections (#6167). relocate() longer drops attributes bare data frames (#6341). relocate() now retains last name change single column renamed multiple times moved. better matches behavior rename() (#6209, help @eutwt). rename() now contains examples using all_of() any_of() rename using named character vector (#6644). rename_with() now disallows renaming .cols tidy-selection (#6561). rename_with() now checks result .fn right type size (#6561). rows_insert() now checks y contains columns (#6652). setequal() ignores differences freely coercible types (e.g. integer double) (#6114) ignores duplicated rows (#6057). slice() helpers produce output equivalent slice(.data, 0) n prop argument 0, fixing bug introduced previous version (@eutwt, #6184). slice() inputs now returns 0 rows. mostly theoretical consistency (#6573). slice() now errors expressions ... named. helps avoid accidentally misspelling optional argument, .(#6554). slice_*() now requires n integer. slice_*() generics now perform argument validation. make methods consistent simpler implement (#6361). slice_min() slice_max() can order_by multiple variables supply data.frame tibble (#6176). slice_min() slice_max() now consistently include missing values result necessary (.e. aren’t enough non-missing values reach n prop selected). don’t want missing values included , set na_rm = TRUE (#6177). slice_sample() now accepts negative n prop values (#6402). slice_sample() returns data frame group number rows input replace = FALSE n larger number rows prop larger 1. reverts change made 1.0.8, returning behavior 1.0.7 (#6185) slice_sample() now gives informative error replace = FALSE number rows requested sample exceeds number rows data (#6271). storms updated include 2021 data missing storms omitted due error (@steveharoz, #6320). summarise() now correctly recycles named 0-column data frames (#6509). union_all(), like union(), now requires data frames compatible: .e. columns, columns compatible types. () re-exported tidyselect (#6597).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-1010","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.10","title":"dplyr 1.0.10","text":"CRAN release: 2022-09-01 Hot patch release resolve R CMD check failures.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-109","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.9","title":"dplyr 1.0.9","text":"CRAN release: 2022-04-28 New rows_append() works like rows_insert() ignores keys allows insert arbitrary rows guarantee type x won’t change (#6249, thanks @krlmlr implementation @mgirlich idea). rows_*() functions longer require key values x uniquely identify row. Additionally, rows_insert() rows_delete() longer require key values y uniquely identify row. Relaxing restriction make functions practically useful data frames, alternative backends can enforce ways needed (.e. primary keys) (#5553). rows_insert() gained new conflict argument allowing greater control rows y keys conflict keys x. conflict arises key y already exists x. default, conflict results error, can now also \"ignore\" y rows. similar CONFLICT NOTHING command SQL (#5588, helpful additions @mgirlich @krlmlr). rows_update(), rows_patch(), rows_delete() gained new unmatched argument allowing greater control rows y keys unmatched keys x. default, unmatched key results error, can now also \"ignore\" y rows (#5984, #5699). rows_delete() longer requires columns y strict subset x. columns specified utilized y, others dropped message. rows_*() functions now always retain column types x. behavior documented, previously wasn’t applied correctly (#6240). rows_*() functions now fail elegantly y zero column data frame isn’t specified (#6179).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-108","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.8","title":"dplyr 1.0.8","text":"CRAN release: 2022-02-08 Better display error messages thanks rlang 1.0.0. mutate(.keep = \"none\") longer identical transmute(). transmute() changed, completely ignores column ordering existing data, instead relying ordering expressions supplied .... mutate(.keep = \"none\") changed ensure pre-existing columns never moved, aligns closely .keep options (#6086). filter() forbids matrix results (#5973) warns data frame results, especially data frames created across() hint use if_any() if_all(). slice() helpers (slice_head(), slice_tail(), slice_min(), slice_max()) now accept negative values n prop (#5961). slice() now indicates group produces error (#5931). cur_data() cur_data_all() don’t simplify list columns rowwise data frames (#5901). dplyr now uses rlang::check_installed() prompt whether install required packages missing. storms data updated 2020 (@steveharoz, #5899). coalesce() accepts 1-D arrays (#5557). deprecated trunc_mat() longer reexported dplyr (#6141).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-107","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.7","title":"dplyr 1.0.7","text":"CRAN release: 2021-06-18 across() uses formula environment inlining (#5886). summarise.rowwise_df() quiet result ungrouped (#5875). c_across() across() key deparsing confused long calls (#5883). across() handles named selections (#5207).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-106","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.6","title":"dplyr 1.0.6","text":"CRAN release: 2021-05-05 add_count() now generic (#5837). if_any() if_all() abort predicate mistakingly used .cols= (#5732). Multiple calls if_any() /if_all() expression now properly disambiguated (#5782). filter() now inlines if_any() if_all() expressions. greatly improves performance grouped data frames. Fixed behaviour ... top-level across() calls (#5813, #5832). across() now inlines lambda-formulas. slightly performant allow optimisations future. Fixed issue bind_rows() causing lists incorrectly transformed data frames (#5417, #5749). select() longer creates duplicate variables renaming variable name grouping variable (#5841). dplyr_col_select() keeps attributes bare data frames (#5294, #5831). Fixed quosure handling dplyr::group_by() caused issues extra arguments (tidyverse/lubridate#959). Removed name argument compute() generic (@ianmcook, #5783). row-wise data frames 0 rows list columns supported (#5804).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-105","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.5","title":"dplyr 1.0.5","text":"CRAN release: 2021-03-05 Fixed edge case slice_sample() weight_by= used 0 rows (#5729). across() can use columns functions defined inline (#5734). Using testthat 3rd edition. Fixed bugs introduced across() previous version (#5765). group_by() keeps attributes unrelated grouping (#5760). .cols= argument if_any() if_all() defaults everything().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-104","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.4","title":"dplyr 1.0.4","text":"CRAN release: 2021-02-02 Improved performance across(). makes summarise(across()) mutate(across()) perform well superseded colwise equivalents (#5697). New functions if_any() if_all() (#4770, #5713). summarise() silently ignores NULL results (#5708). Fixed performance regression mutate() warnings occur per group (#5675). longer instrument warnings debugging information mutate() called within suppressWarnings().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-103","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.3","title":"dplyr 1.0.3","text":"CRAN release: 2021-01-15 summarise() longer informs result ungrouped (#5633). group_by(.drop = FALSE) preserves ordered factors (@brianrice2, #5545). count() tally() now generic. Removed default fallbacks lazyeval methods; yield better error messages call dplyr function wrong input, part long term plan remove deprecated lazyeval interface. inner_join() gains keep parameter consistency mutating joins (@patrickbarks, #5581). Improved performance many columns, dynamic data mask using active bindings lazy chops (#5017). mutate() friends preserves row names data frames (#5418). group_by() uses ungrouped data implicit mutate step (#5598). might define ungroup() method custom classes. example, see https://github.com/hadley/cubelyr/pull/3. relocate() can rename columns relocates (#5569). distinct() group_by() better error messages mutate step fails (#5060). Clarify () vectorised (#5493). Fixed across() issue data frame columns referred all_of() nested case (mutate() within mutate()) (#5498). across() handles data frames 0 columns (#5523). mutate() always keeps grouping variables, unconditional .keep= (#5582). dplyr now depends R 3.3.0","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-102","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.2","title":"dplyr 1.0.2","text":"CRAN release: 2020-08-18 Fixed across() issue data frame columns mask objects referred all_of() (#5460). bind_cols() gains .name_repair argument, passed vctrs::vec_cbind() (#5451) summarise(.groups = \"rowwise\") makes rowwise data frame even input data grouped (#5422).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-101","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.1","title":"dplyr 1.0.1","text":"CRAN release: 2020-07-31 New function cur_data_all() similar cur_data() includes grouping variables (#5342). count() tally() longer automatically weights column n present (#5298). dplyr 1.0.0 introduced behaviour Hadley’s faulty memory. Historically tally() automatically weighted count() , behaviour accidentally changed 0.8.2 (#4408) neither automatically weighted n. Since 0.8.2 almost year old, automatically weighting behaviour little confusing anyway, ’ve removed count() tally(). Use wt = n() now deprecated; now just omit wt argument. coalesce() now supports data frames correctly (#5326). cummean() longer --one indexing problem (@cropgen, #5287). call stack preserved error. makes possible recover() problematic code called dplyr verbs (#5308).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-100","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.0","title":"dplyr 1.0.0","text":"CRAN release: 2020-05-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-1-0-0","dir":"Changelog","previous_headings":"","what":"Breaking changes","title":"dplyr 1.0.0","text":"bind_cols() longer converts tibble, returns data frame input data frame. bind_rows(), *_join(), summarise() mutate() use vctrs coercion rules. two main user facing changes: Combining factor character vectors silently creates character vector; previously created character vector warning. Combining multiple factors creates factor combined levels; previously created character vector warning. bind_rows() functions use vctrs name repair, see ?vctrs::vec_as_names. .equal.tbl_df() removed. Data frames, tibbles grouped data frames longer considered equal, even data . Equality checks data frames longer ignore row order groupings. expect_equal() uses .equal() internally. comparing data frames, tests used pass may now fail. distinct() keeps original column order. distinct() missing columns now raises error, compatibility warning long time. group_modify() puts grouping variable front. n() row_number() can longer called directly dplyr loaded, now generates error: dplyr::mutate(mtcars, x = n()). Fix prefixing dplyr:: dplyr::mutate(mtcars, x = dplyr::n()) old data format grouped_df longer supported. may affect serialized grouped data frames disk, e.g. saveRDS() using knitr caching. lead() lag() stricter inputs. Extending data frames requires extra class classes added first, last. extra class end causes vctrs operations fail message like: right_join() longer sorts rows resulting tibble according order RHS argument tibble y.","code":"Input must be a vector, not a `` object"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-1-0-0","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 1.0.0","text":"cur_ functions (cur_data(), cur_group(), cur_group_id(), cur_group_rows()) provide full set options access information “current” group dplyr verbs. inspired data.table’s .SD, .GRP, ., .. rows_ functions (rows_insert(), rows_update(), rows_upsert(), rows_patch(), rows_delete()) provide new API insert delete rows second data frame table. Support updating mutable backends planned (#4654). mutate() summarise() create multiple columns single expression return data frame (#2326). select() rename() use latest version tidyselect interface. Practically, means can now combine selections using Boolean logic (.e. !, & |), use predicate functions () (e.g. (.character)) select variables type (#4680). also makes possible use select() rename() repair data frames duplicated names (#4615) prevents accidentally introducing duplicate names (#4643). also means dplyr now re-exports any_of() all_of() (#5036). slice() gains new set helpers: slice_head() slice_tail() select first last rows, like head() tail(), return n rows per group. slice_sample() randomly selects rows, taking sample_frac() sample_n(). slice_min() slice_max() select rows minimum maximum values variable, taking confusing top_n(). summarise() can create summaries greater length 1 use summary function returns multiple values. summarise() gains .groups= argument control grouping structure. New relocate() verb makes easy move columns around within data frame (#4598). New rename_with() designed specifically purpose renaming selected columns function (#4771). ungroup() can now selectively remove grouping variables (#3760). pull() can now return named vectors specifying additional column name (@ilarischeinin, #4102).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"experimental-features-1-0-0","dir":"Changelog","previous_headings":"","what":"Experimental features","title":"dplyr 1.0.0","text":"mutate() (data frames ), gains experimental new arguments ..allow control new columns placed (#2047). mutate() (data frames ), gains experimental new argument called .keep allows control variables kept input .data. .keep = \"\" default; keeps variables. .keep = \"none\" retains input variables (except grouping keys), behaves like transmute(). .keep = \"unused\" keeps variables used make new columns. .keep = \"used\" keeps input variables used create new columns; ’s useful double checking work (#3721). New, experimental, with_groups() makes easy temporarily group ungroup (#4711).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"across-1-0-0","dir":"Changelog","previous_headings":"","what":"across()","title":"dplyr 1.0.0","text":"New function across() can used inside summarise(), mutate(), verbs apply function (set functions) selection columns. See vignette(\"colwise\") details. New function c_across() can used inside summarise() mutate() row-wise data frames easily (e.g.) compute row-wise mean numeric variables. See vignette(\"rowwise\") details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"rowwise-1-0-0","dir":"Changelog","previous_headings":"","what":"rowwise()","title":"dplyr 1.0.0","text":"rowwise() longer questioning; now understand ’s important tool don’t vectorised code. now also allows specify additional variables preserved output summarising (#4723). rowwise-ness preserved operations; need explicit drop as_tibble() group_by(). New, experimental, nest_by(). interface group_by(), returns rowwise data frame grouping keys, supplemental list-column data frames containing rest data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vctrs-1-0-0","dir":"Changelog","previous_headings":"","what":"vctrs","title":"dplyr 1.0.0","text":"implementation dplyr verbs changed use primitives provided vctrs package. makes easier add support new types vector, radically simplifies implementation, makes dplyr verbs consistent. place mostly likely impacted coercion changes working factors joins grouped mutates: now combining factors different levels, dplyr creates new factor union levels. matches base R closely, perhaps strictly less correct, much convenient. dplyr dropped two heaviest dependencies: Rcpp BH. make considerably easier faster build source. implementation verbs carefully thought . mostly makes implementation simpler hopefully increase consistency, also makes easier adapt dplyr new data structures new future. Pragmatically, biggest difference people verb documents return value terms rows, columns, groups, data frame attributes. Row names now preserved working data frames.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"grouping-1-0-0","dir":"Changelog","previous_headings":"","what":"Grouping","title":"dplyr 1.0.0","text":"group_by() uses hashing vctrs package. Grouped data frames now names<-, [[<-, [<- $<- methods re-generate underlying grouping. Note modifying grouping variables multiple steps (.e. df$grp1 <- 1; df$grp2 <- 1) inefficient since data frame regrouped modification. [.grouped_df now regroups respect grouping columns removed (#4708). mutate() summarise() can now modify grouping variables (#4709). group_modify() works additional arguments (@billdenney @cderv, #4509) group_by() create arbitrary NA group grouping factors drop = TRUE (#4460).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"lifecycle-changes-1-0-0","dir":"Changelog","previous_headings":"","what":"Lifecycle changes","title":"dplyr 1.0.0","text":"deprecations now use lifecycle, means default ’ll see deprecation warning per session, can control options(lifecycle_verbosity = x) x one NULL, “quiet”, “warning”, “error”.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"removed-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Removed","title":"dplyr 1.0.0","text":"id(), deprecated dplyr 0.5.0, now defunct. failwith(), deprecated dplyr 0.7.0, now defunct. tbl_cube() nasa pulled separate cubelyr package (#4429). rbind_all() rbind_list() removed (@bjungbogati, #4430). dr_dplyr() removed longer needed (#4433, @smwindecker).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Deprecated","title":"dplyr 1.0.0","text":"Use pkgconfig setting na_matches argument join functions now deprecated (#4914). rarely used, ’m now confident default correct R. add_count(), drop argument deprecated didn’t actually affect output. add_rownames(): please use tibble::rownames_to_column() instead. .tbl() tbl_df(): please use as_tibble() instead. bench_tbls(), compare_tbls(), compare_tbls2(), eval_tbls() eval_tbls2() now deprecated. used handful packages, now believe ’re better performing comparisons directly (#4675). combine(): please use vctrs::vec_c() instead. funs(): please use list() instead. group_by(add = ): please use .add instead. group_by(.dots = )/group_by_prepare(.dots = ): please use !!! instead (#4734). use zero-arg group_indices() retrieve group id “current” group deprecated; instead use cur_group_id(). Passing arguments group_keys() group_indices() change grouping deprecated, instead grouping first . location() changes(): please use lobstr::ref() instead. progress_estimated() soft deprecated; ’s responsibility dplyr provide progress bars (#4935). src_local() deprecated; part approach testing dplyr backends didn’t pan . src_mysql(), src_postgres(), src_sqlite() deprecated. ’ve recommended time. Instead please use approach described https://dbplyr.tidyverse.org/. select_vars(), rename_vars(), select_var(), current_vars() now deprecated (@perezp44, #4432)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"superseded-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Superseded","title":"dplyr 1.0.0","text":"scoped helpers (functions ending _if, _at, _all) superseded across(). dramatically reduces API surface dplyr, providing providing flexible less error-prone interface (#4769). rename_*() select_*() superseded rename_with(). () superseded favour summarise(). sample_n() sample_frac() superseded slice_sample(). See ?sample_n details , examples converting old new usage. top_n() superseded byslice_min()/slice_max(). See ?top_n details , convert old new usage (#4494).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"questioning-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Questioning","title":"dplyr 1.0.0","text":"all_equal() questioning; solves problem longer seems important.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"stable-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Stable","title":"dplyr 1.0.0","text":"rowwise() longer questioning.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"documentation-improvements-1-0-0","dir":"Changelog","previous_headings":"","what":"Documentation improvements","title":"dplyr 1.0.0","text":"New vignette(\"base\") describes dplyr verbs relate base R equivalents (@sastoudt, #4755) New vignette(\"grouping\") gives details dplyr verbs change applied grouped data frames (#4779, @MikeKSmith). vignette(\"programming\") completely rewritten reflect latest vocabulary, recent rlang features, current recommendations. now substantially easier program dplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-and-bug-fixes-1-0-0","dir":"Changelog","previous_headings":"","what":"Minor improvements and bug fixes","title":"dplyr 1.0.0","text":"dplyr now rudimentary, experimental, stop-gap, extension mechanism documented ?dplyr_extending dplyr longer provides .equal.tbl_df() method. never done first place owns neither generic class. also provided problematic implementation , default, ignored order rows columns usually important. likely cause new test failures downstream packages; whole believe failures either reflect unexpected behaviour tests need strengthened (#2751). coalesce() now uses vctrs recycling common type coercion rules (#5186). count() add_count() better job preserving input class attributes (#4086). distinct() errors request use variables don’t exist (previously warning) (#4656). filter(), mutate() summarise() get better error messages. filter() handles data frame results columns logical vectors reducing & (#4678). particular means across() can used filter(). left_join(), right_join(), full_join() gain keep argument can optionally choose keep sets join keys (#4589). useful want figure rows missing either side. Join functions can now perform cross-join specifying = character() (#4206.) groups() now returns list() ungrouped data; previously returned NULL type-unstable (groups returns list symbols). first argument group_map(), group_modify() group_walk() changed .data consistency generics. group_keys.rowwise_df() gives 0 column data frame n() rows. group_map() now generic (#4576). group_by(..., .add = TRUE) replaces group_by(..., add = TRUE), deprecation message. old argument name mistake prevents creating new grouping var called add violates naming conventions (#4137). intersect(), union(), setdiff() setequal() generics now imported generics package. reduces conflict lubridate. order_by() gives informative hint accidentally call instead arrange() #3357. tally() count() now message default output name (n), already exists data frame. quiet message, ’ll need supply explicit name (#4284). can override default weighting using constant setting wt = 1. starwars dataset now better job separating biological sex gender identity. previous gender column renamed sex, since actually describes individual’s biological sex. new gender column encodes actual gender identity using information Star Wars universe (@MeganBeckett, #4456). src_tbls() accepts ... arguments (#4485, @ianmcook). breaking change dplyr backend packages implement src_tbls(). Better performance extracting slices factors ordered factors (#4501). rename_at() rename_all() call function simple character vector, dplyr_sel_vars (#4459). ntile() now consistent database implementations buckets irregular size (#4495).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-085-2020-03-07","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.5 (2020-03-07)","title":"dplyr 0.8.5 (2020-03-07)","text":"CRAN release: 2020-03-07 Maintenance release compatibility R-devel.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-084-2020-01-30","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.4 (2020-01-30)","title":"dplyr 0.8.4 (2020-01-30)","text":"CRAN release: 2020-01-31 Adapt tests changes dependent packages.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-083-2019-07-04","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.3 (2019-07-04)","title":"dplyr 0.8.3 (2019-07-04)","text":"CRAN release: 2019-07-04 Fixed performance regression introduced version 0.8.2 (#4458).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-082-2019-06-28","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.2 (2019-06-28)","title":"dplyr 0.8.2 (2019-06-28)","text":"CRAN release: 2019-06-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-8-2","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.8.2 (2019-06-28)","text":"top_frac(data, proportion) shorthand top_n(data, proportion * n()) (#4017).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"colwise-changes-0-8-2","dir":"Changelog","previous_headings":"","what":"colwise changes","title":"dplyr 0.8.2 (2019-06-28)","text":"Using quosures colwise verbs deprecated (#4330). Updated distinct_if(), distinct_at() distinct_all() include .keep_all argument (@beansrowning, #4343). rename_at() handles empty selection (#4324). *_if() functions correctly handle columns special names (#4380). colwise functions support constants formulas (#4374).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"hybrid-evaluation-changes-0-8-2","dir":"Changelog","previous_headings":"","what":"Hybrid evaluation changes","title":"dplyr 0.8.2 (2019-06-28)","text":"hybrid rank functions correctly handle NA (#4427). first(), last() nth() hybrid version handles factors (#4295).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-8-2","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.8.2 (2019-06-28)","text":"top_n() quotes n argument, n longer needs constant groups (#4017). tbl_vars() keeps information grouping columns returning dplyr_sel_vars object (#4106). group_split() always sets ptype attribute, make robust case 0 groups. group_map() group_modify() work 0 group edge case (#4421) select.list() method added select() dispatch lists (#4279). view() reexported tibble (#4423). group_by() puts NA groups last character vectors (#4227). arrange() handles integer64 objects (#4366). summarise() correctly resolves summarised list columns (#4349).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-081-2019-05-14","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.1 (2019-05-14)","title":"dplyr 0.8.1 (2019-05-14)","text":"CRAN release: 2019-05-14","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-0-8-1","dir":"Changelog","previous_headings":"","what":"Breaking changes","title":"dplyr 0.8.1 (2019-05-14)","text":"group_modify() new name function previously known group_map()","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-8-1","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.8.1 (2019-05-14)","text":"group_map() now calls function group return list. group_by_drop_default(), previously known dplyr:::group_drops() exported (#4245).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-8-1","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.8.1 (2019-05-14)","text":"Lists formulas passed colwise verbs now automatically named. group_by() shallow copy even groups case (#4221). Fixed mutate() rowwise data frames 0 rows (#4224). Fixed handling bare formulas colwise verbs (#4183). Fixed performance n_distinct() (#4202). group_indices() now ignores empty groups default data.frame, consistent default group_by() (@yutannihilation, #4208). Fixed integer overflow hybrid ntile() (#4186). colwise functions summarise_at() … can rename vars case multiple functions (#4180). select_if() rename_if() handle logical vector predicate (#4213). hybrid min() max() cast integer possible (#4258). bind_rows() correctly handles cases multiple consecutive NULL (#4296). Support R 3.1.* dropped. minimal R version supported now 3.2.0. https://www.tidyverse.org/articles/2019/04/r-version-support/ rename_at() handles empty selection (#4324).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-0801-2019-02-15","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.0.1 (2019-02-15)","title":"dplyr 0.8.0.1 (2019-02-15)","text":"CRAN release: 2019-02-15 Fixed integer C/C++ division, forced released CRAN (#4185).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-080-2019-02-14","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.0 (2019-02-14)","title":"dplyr 0.8.0 (2019-02-14)","text":"CRAN release: 2019-02-14","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-0-8-0","dir":"Changelog","previous_headings":"","what":"Breaking changes","title":"dplyr 0.8.0 (2019-02-14)","text":"error find function \"n\" warning Calling `n()` without importing prefixing deprecated, use `dplyr::n()` indicates functions like n(), row_number(), … imported prefixed. easiest fix import dplyr import(dplyr) NAMESPACE #' @import dplyr roxygen comment, alternatively functions can imported selectively function importFrom(dplyr, n) NAMESPACE #' @importFrom dplyr n roxygen comment. third option prefix , .e. use dplyr::n() see checking S3 generic/method consistency R CMD check package, note : sample_n() sample_frac() gained ... filter() slice() gained .preserve group_by() gained .drop Error: `.data` corrupt grouped_df, ... signals code makes wrong assumptions internals grouped data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-8-0","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.8.0 (2019-02-14)","text":"New selection helpers group_cols(). can called selection contexts select() matches grouping variables grouped tibbles. last_col() re-exported tidyselect (#3584). group_trim() drops unused levels factors used grouping variables. nest_join() creates list column matching rows. nest_join() + tidyr::unnest() equivalent inner_join (#3570). group_nest() similar tidyr::nest() focusing variables nest instead nested columns. group_split() similar base::split() operating existing groups applied grouped data frame, subject data mask ungrouped data frames group_map() group_walk() purrr-like functions iterate groups grouped data frame, jointly identified data subset (exposed .x) data key (one row tibble, exposed .y). group_map() returns grouped data frame combines results function, group_walk() used side effects returns input invisibly. distinct_prepare(), previously known distinct_vars() exported. mostly useful alternative backends (e.g. dbplyr).","code":"band_members %>% nest_join(band_instruments) starwars %>% group_by(species, homeworld) %>% group_nest() starwars %>% group_nest(species, homeworld) starwars %>% group_by(species, homeworld) %>% group_split() starwars %>% group_split(species, homeworld) mtcars %>% group_by(cyl) %>% group_map(~ head(.x, 2L))"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"major-changes-0-8-0","dir":"Changelog","previous_headings":"","what":"Major changes","title":"dplyr 0.8.0 (2019-02-14)","text":"group_by() gains .drop argument. set FALSE groups generated based factor levels, hence groups may empty (#341). default behaviour drops empty groups previous versions. filter() slice() gain .preserve argument control groups keep. default filter(.preserve = FALSE) recalculates grouping structure based resulting data, otherwise kept . notion lazily grouped data frames disappeared. dplyr verbs now recalculate immediately grouping structure, respect levels factors. Subsets columns now properly dispatch [ [[ method column object (vector class) instead making assumptions column handled. [ method must handle integer indices, including NA_integer_, .e. x[NA_integer_] produce vector class x whatever represents missing value.","code":"# 3 groups tibble( x = 1:2, f = factor(c(\"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) %>% group_by(f, .drop = FALSE) # the order of the grouping variables matter df <- tibble( x = c(1,2,1,2), f = factor(c(\"a\", \"b\", \"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) df %>% group_by(f, x, .drop = FALSE) df %>% group_by(x, f, .drop = FALSE) tibble( x = 1:2, f = factor(c(\"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) %>% group_by(f) df <- tibble( x = c(1,2,1,2), f = factor(c(\"a\", \"b\", \"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) %>% group_by(x, f, .drop = FALSE) df %>% filter(x == 1) df %>% filter(x == 1, .preserve = TRUE)"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-8-0","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.8.0 (2019-02-14)","text":"tally() works correctly non-data frame table sources tbl_sql (#3075). sample_n() sample_frac() can use n() (#3527) distinct() respects order variables provided (#3195, @foo-bar-baz-qux) handles 0 rows 0 columns special case (#2954). combine() uses tidy dots (#3407). group_indices() can used without argument expressions verbs (#1185). Using mutate_all(), transmute_all(), mutate_if() transmute_if() grouped tibbles now informs grouping variables ignored. case _all() verbs, message invites use mutate_at(df, vars(-group_cols())) (equivalent transmute_at() call) instead ’d like make explicit code operation applied grouping variables. Scoped variants arrange() respect .by_group argument (#3504). first() last() hybrid functions fall back R evaluation given arguments (#3589). mutate() removes column expression evaluates NULL groups (#2945). grouped data frames support [, drop = TRUE] (#3714). New low-level constructor new_grouped_df() validator validate_grouped_df (#3837). glimpse() prints group information grouped tibbles (#3384). sample_n() sample_frac() gain ... (#2888). Scoped filter variants now support functions purrr-like lambdas:","code":"mtcars %>% filter_at(vars(hp, vs), ~ . %% 2 == 0)"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"lifecycle-0-8-0","dir":"Changelog","previous_headings":"","what":"Lifecycle","title":"dplyr 0.8.0 (2019-02-14)","text":"(), rowwise() combine() questioning (#3494). funs() soft-deprecated start issuing warnings future version.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"changes-to-column-wise-functions-0-8-0","dir":"Changelog","previous_headings":"","what":"Changes to column wise functions","title":"dplyr 0.8.0 (2019-02-14)","text":"Scoped variants distinct(): distinct_at(), distinct_if(), distinct_all() (#2948). summarise_at() excludes grouping variables (#3613). mutate_all(), mutate_at(), summarise_all() summarise_at() handle utf-8 names (#2967).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"performance-0-8-0","dir":"Changelog","previous_headings":"","what":"Performance","title":"dplyr 0.8.0 (2019-02-14)","text":"R expressions handled native code now evaluated unwind-protection available (R 3.5 later). improves performance dplyr data frames many groups (hence many expressions evaluate). benchmarked computing grouped average consistently twice fast unwind-protection enabled. Unwind-protection also makes dplyr robust corner cases ensures C++ destructors correctly called circumstances (debugger exit, captured condition, restart invocation). sample_n() sample_frac() gain ... (#2888). Improved performance wide tibbles (#3335). Faster hybrid sum(), mean(), var() sd() logical vectors (#3189). Hybrid version sum(na.rm = FALSE) exits early missing values. considerably improves performance missing values early vector (#3288). group_by() trigger additional mutate() simple uses .data pronoun (#3533).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"internal-0-8-0","dir":"Changelog","previous_headings":"","what":"Internal","title":"dplyr 0.8.0 (2019-02-14)","text":"grouping metadata grouped data frame reorganized single tidy tibble, can accessed new group_data() function. grouping tibble consists one column per grouping variable, followed list column (1-based) indices groups. new group_rows() function retrieves list indices (#3489). Hybrid evaluation completely redesigned better performance stability.","code":"# the grouping metadata, as a tibble group_by(starwars, homeworld) %>% group_data() # the indices group_by(starwars, homeworld) %>% group_data() %>% pull(.rows) group_by(starwars, homeworld) %>% group_rows()"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"documentation-0-8-0","dir":"Changelog","previous_headings":"","what":"Documentation","title":"dplyr 0.8.0 (2019-02-14)","text":"Add documentation example moving variable back ?select (#3051). column wise functions better documented, particular explaining grouping variables included part selection.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-and-defunct-functions-0-8-0","dir":"Changelog","previous_headings":"Documentation","what":"Deprecated and defunct functions","title":"dplyr 0.8.0 (2019-02-14)","text":"mutate_each() summarise_each() deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-076","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.6","title":"dplyr 0.7.6","text":"CRAN release: 2018-06-29 exprs() longer exported avoid conflicts Biobase::exprs() (#3638). MASS package explicitly suggested fix CRAN warnings R-devel (#3657). Set operations like intersect() setdiff() reconstruct groups metadata (#3587) keep order rows (#3839). Using namespaced calls base::sort() base::unique() C++ code avoid ambiguities functions overridden (#3644). Fix rchk errors (#3693).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-075-2018-04-14","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.5 (2018-04-14)","title":"dplyr 0.7.5 (2018-04-14)","text":"CRAN release: 2018-05-19","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-for-package-developers-0-7-5","dir":"Changelog","previous_headings":"","what":"Breaking changes for package developers","title":"dplyr 0.7.5 (2018-04-14)","text":"major change version dplyr now depends selecting backend tidyselect package. linking dplyr::select_helpers documentation topic, update link point tidyselect::select_helpers. Another change causes warnings packages dplyr now exports exprs() function. causes collision Biobase::exprs(). Either import functions dplyr selectively rather bulk, import Biobase::exprs() refer namespace qualifier.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-7-5","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.7.5 (2018-04-14)","text":"distinct(data, \"string\") now returns one-row data frame . (previous behavior return data unchanged.) () operations one named argument can access . (#2998). Reindexing grouped data frames (e.g. filter() ..._join()) never updates \"class\" attribute. also avoids unintended updates original object (#3438). Fixed rare column name clash ..._join() non-join columns name tables (#3266). Fix ntile() row_number() ordering use locale-dependent ordering functions R dealing character vectors, rather always using C-locale ordering function C (#2792, @foo-bar-baz-qux). Summaries summaries (summarise(b = sum(), c = sum(b))) now computed using standard evaluation simplicity correctness, slightly slower (#3233). Fixed summarise() empty data frames zero columns (#3071).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"major-changes-0-7-5","dir":"Changelog","previous_headings":"","what":"Major changes","title":"dplyr 0.7.5 (2018-04-14)","text":"enexpr(), expr(), exprs(), sym() syms() now exported. sym() syms() construct symbols strings character vectors. expr() variants equivalent quo(), quos() enquo() return simple expressions rather quosures. support quasiquotation. dplyr now depends new tidyselect package power select(), rename(), pull() variants (#2896). Consequently select_vars(), select_var() rename_vars() soft-deprecated start issuing warnings future version. Following switch tidyselect, select() rename() fully support character vectors. can now unquote variables like : Note works selecting functions contexts strings character vectors ambiguous. instance strings valid input mutating operations mutate(df, \"foo\") creates new column recycling “foo” number rows.","code":"vars <- c(\"disp\", \"cyl\") select(mtcars, !! vars) select(mtcars, -(!! vars))"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-7-5","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.7.5 (2018-04-14)","text":"Support raw vector columns arrange(), group_by(), mutate(), summarise() ..._join() (minimal raw x raw support initially) (#1803). bind_cols() handles unnamed list (#3402). bind_rows() works around corrupt columns object bit set class attribute (#3349). combine() returns logical() inputs NULL (inputs) (#3365, @zeehio). distinct() now supports renaming columns (#3234). Hybrid evaluation simplifies dplyr::foo() foo() (#3309). Hybrid functions can now masked regular R functions turn hybrid evaluation (#3255). hybrid evaluator finds functions dplyr even dplyr attached (#3456). mutate() now illegal use data.frame rhs (#3298). Support !!! recode_factor() (#3390). row_number() works empty subsets (#3454). select() vars() now treat NULL empty inputs (#3023). Scoped select rename functions (select_all(), rename_if() etc.) now work grouped data frames, adapting grouping necessary (#2947, #3410). group_by_at() can group existing grouping variable (#3351). arrange_at() can use grouping variables (#3332). slice() longer enforce tibble classes input simple data.frame, ignores 0 (#3297, #3313). transmute() longer prints message including group variable.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"documentation-0-7-5","dir":"Changelog","previous_headings":"","what":"Documentation","title":"dplyr 0.7.5 (2018-04-14)","text":"Improved documentation funs() (#3094) set operations (e.g. union()) (#3238, @edublancas).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"error-messages-0-7-5","dir":"Changelog","previous_headings":"","what":"Error messages","title":"dplyr 0.7.5 (2018-04-14)","text":"Better error message dbplyr installed accessing database backends (#3225). arrange() fails gracefully data.frame columns (#3153). Corrected error message calling cbind() object wrong length (#3085). Add warning explanation distinct() selected columns type list (#3088, @foo-bar-baz-qux), used unknown columns (#2867, @foo-bar-baz-qux). Show clear error message bad arguments funs() (#3368). Better error message ..._join() joining data frames duplicate NA column names. Joining data frames semi- anti-join now gives warning, may converted error future versions (#3243, #3417). Dedicated error message trying use columns Interval Period classes (#2568). Added .onDetach() hook allows plyr loaded attached without warning message says functions dplyr masked, since dplyr longer attached (#3359, @jwnorman).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"performance-0-7-5","dir":"Changelog","previous_headings":"","what":"Performance","title":"dplyr 0.7.5 (2018-04-14)","text":"sample_n() sample_frac() grouped data frame now faster especially large number groups (#3193, @saurfang).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"internal-0-7-5","dir":"Changelog","previous_headings":"","what":"Internal","title":"dplyr 0.7.5 (2018-04-14)","text":"Compute variable names joins R (#3430). Bumped Rcpp dependency 0.12.15 avoid imperfect detection NA values hybrid evaluation fixed RcppCore/Rcpp#790 (#2919). Avoid cleaning data mask, temporary environment used evaluate expressions. environment, e.g. mutate() expression evaluated, preserved operation, accessing variables environment now gives warning still returns NULL (#3318).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-074","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.4","title":"dplyr 0.7.4","text":"CRAN release: 2017-09-28 Fix recent Fedora ASAN check errors (#3098). Avoid dependency Rcpp 0.12.10 (#3106).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-073","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.3","title":"dplyr 0.7.3","text":"CRAN release: 2017-09-09 Fixed protection error occurred creating character column using grouped mutate() (#2971). Fixed rare problem accessing variable values summarise() groups size one (#3050). distinct() now throws error used unknown columns (#2867, @foo-bar-baz-qux). Fixed rare --bounds memory write slice() negative indices beyond number rows involved (#3073). select(), rename() summarise() longer change grouped vars original data (#3038). nth(default = var), first(default = var) last(default = var) fall back standard evaluation grouped operation instead triggering error (#3045). case_when() now works LHS atomic (#2909), LHS RHS values zero-length vectors (#3048). case_when() accepts NA LHS (#2927). Semi- anti-joins now preserve order left-hand-side data frame (#3089). Improved error message invalid list arguments bind_rows() (#3068). Grouping character vectors now faster (#2204). Fixed crash occurred unexpected input supplied call argument order_by() (#3065).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-072","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.2","title":"dplyr 0.7.2","text":"CRAN release: 2017-07-20 Move build-time vs. run-time checks .onLoad() dr_dplyr().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-071","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.1","title":"dplyr 0.7.1","text":"CRAN release: 2017-06-22 Use new versions bindrcpp glue avoid protection problems. Avoid wrapping arguments internal error functions (#2877). Fix two protection mistakes found rchk (#2868). Fix C++ error caused compilation fail mac cran (#2862) Fix undefined behaviour (), NA_REAL assigned instead NA_LOGICAL. (#2855, @zeehio) top_n() now executes operations lazily compatibility database backends (#2848). Reuse new variables created ungrouped mutate() possible , regression introduced dplyr 0.7.0 (#2869). Quosured symbols prevent hybrid handling anymore. fix many performance issues introduced tidyeval (#2822).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-070","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.0","title":"dplyr 0.7.0","text":"CRAN release: 2017-06-09","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-data-functions-and-features-0-7-0","dir":"Changelog","previous_headings":"","what":"New data, functions, and features","title":"dplyr 0.7.0","text":"Five new datasets provide interesting built-datasets demonstrate dplyr verbs (#2094): starwars dataset starwars characters; list columns storms trajectories ~200 tropical storms band_members, band_instruments band_instruments2 simple data demonstrate joins. New add_count() add_tally() adding n column within groups (#2078, @dgrtwo). arrange() grouped data frames gains .by_group argument can choose sort groups want (defaults FALSE) (#2318) New pull() generic extracting single column either name position (either left right). Thanks @paulponcet idea (#2054). verb powered new select_var() internal helper, exported well. like select_vars() returns single variable. as_tibble() re-exported tibble. recommend way create tibbles existing data frames. tbl_df() softly deprecated. tribble() now imported tibble (#2336, @chrMongeau); now preferred frame_data().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-and-defunct-0-7-0","dir":"Changelog","previous_headings":"","what":"Deprecated and defunct","title":"dplyr 0.7.0","text":"dplyr longer messages need dtplyr work data.table (#2489). Long deprecated regroup(), mutate_each_q() summarise_each_q() functions removed. Deprecated failwith(). ’m even sure . Soft-deprecated mutate_each() summarise_each(), functions print message changed warning next release. .env argument sample_n() sample_frac() defunct, passing value argument print message changed warning next release.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"databases-0-7-0","dir":"Changelog","previous_headings":"","what":"Databases","title":"dplyr 0.7.0","text":"version dplyr includes major changes database connections work. large, able continue using existing dplyr database code without modification, two big changes aware : Almost database related code moved dplyr new package, dbplyr. makes dplyr simpler, make easier release fixes bugs affect databases. src_mysql(), src_postgres(), src_sqlite() still live dplyr existing code continues work. longer necessary create remote “src”. Instead can work directly database connection returned DBI. reflects maturity DBI ecosystem. Thanks largely work Kirill Muller (funded R Consortium) DBI backends now much consistent, comprehensive, easier use. means ’s longer need layer DBI. can continue use src_mysql(), src_postgres(), src_sqlite(), recommend new style makes connection DBI clear: particularly useful want perform non-SELECT queries can whatever want DBI::dbGetQuery() DBI::dbExecute(). ’ve implemented database backend dplyr, please read backend news see ’s changed perspective (much). want ensure package works current previous version dplyr, see wrap_dbplyr_obj() helpers.","code":"library(dplyr) con <- DBI::dbConnect(RSQLite::SQLite(), \":memory:\") DBI::dbWriteTable(con, \"mtcars\", mtcars) mtcars2 <- tbl(con, \"mtcars\") mtcars2"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"utf-0-7-0","dir":"Changelog","previous_headings":"","what":"UTF-8","title":"dplyr 0.7.0","text":"Internally, column names always represented character vectors, language symbols, avoid encoding problems Windows (#1950, #2387, #2388). Error messages explanations data frame inequality now encoded UTF-8, also Windows (#2441). Joins now always reencode character columns UTF-8 necessary. gives nice speedup, now pointer comparison can used instead string comparison, relies proper encoding tag strings (#2514). Fixed problems joining factor character encodings mix native UTF-8 encoded values (#1885, #2118, #2271, #2451). Fix group_by() data frames UTF-8 encoded names (#2284, #2382). New group_vars() generic returns grouping character vector, avoid potentially lossy conversion language symbols. list returned group_by_prepare() now new group_names component (#1950, #2384).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"colwise-functions-0-7-0","dir":"Changelog","previous_headings":"","what":"Colwise functions","title":"dplyr 0.7.0","text":"rename(), select(), group_by(), filter(), arrange() transmute() now scoped variants (verbs suffixed _if(), _at() _all()). Like mutate_all(), summarise_if(), etc, variants apply operation selection variables. scoped verbs taking predicates (mutate_if(), summarise_if(), etc) now support S3 objects lazy tables. S3 objects implement methods length(), [[ tbl_vars(). lazy tables, first 100 rows collected predicate applied subset data. robust common case checking type column (#2129). Summarise mutate colwise functions pass ... manipulation functions. performance colwise verbs like mutate_all() now back mutate_each(). funs() better handling namespaced functions (#2089). Fix issue mutate_if() summarise_if() predicate function returns vector FALSE (#1989, #2009, #2011).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"tidyeval-0-7-0","dir":"Changelog","previous_headings":"","what":"Tidyeval","title":"dplyr 0.7.0","text":"dplyr new approach non-standard evaluation (NSE) called tidyeval. described detail vignette(\"programming\") , brief, gives ability interpolate values contexts dplyr usually works expressions: ```{r} my_var <- quo(homeworld) starwars %>% group_by(!!my_var) %>% summarise_at(vars(height:mass), mean, na.rm = TRUE) ``` means underscored version main verb longer needed, functions deprecated (remain around backward compatibility). order_by(), top_n(), sample_n() sample_frac() now use tidyeval capture arguments expression. makes possible use unquoting idioms (see vignette(\"programming\")) fixes scoping issues (#2297). verbs taking dots now ignore last argument empty. makes easier copy lines code without worry deleting trailing commas (#1039). [API] new .data .env environments can used inside verbs operate data: .data$column_name accesses column column_name, whereas .env$var accesses external variable var. Columns external variables named .data .env shadowed, use .data$... /.env$... access . (.data implements strict matching also $ operator (#2591).) column() global() functions removed. never documented officially. Use new .data .env environments instead. Expressions verbs now interpreted correctly many cases failed (e.g., use $, case_when(), nonstandard evaluation, …). expressions now evaluated specially constructed temporary environment retrieves column data demand help bindrcpp package (#2190). temporary environment poses restrictions assignments using <- inside verbs. prevent leaking broken bindings, temporary environment cleared evaluation (#2435).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"joins-0-7-0","dir":"Changelog","previous_headings":"Verbs","what":"Joins","title":"dplyr 0.7.0","text":"[API] xxx_join.tbl_df(na_matches = \"never\") treats NA values different (value), never match. corresponds behavior joins database sources, database joins general. match NA values, pass na_matches = \"na\" join verbs; supported data frames. default na_matches = \"na\", kept sake compatibility v0.5.0. can tweaked calling pkgconfig::set_config(\"dplyr::na_matches\", \"na\") (#2033). common_by() gets better error message unexpected inputs (#2091) Fix groups joining grouped data frames duplicate columns (#2330, #2334, @davidkretch). One two join suffixes can now empty string, dplyr longer hangs (#2228, #2445). Anti- semi-joins warn factor levels inconsistent (#2741). Warnings join column inconsistencies now contain column names (#2728).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"select-0-7-0","dir":"Changelog","previous_headings":"Verbs","what":"Select","title":"dplyr 0.7.0","text":"selecting variables, first selector decides ’s inclusive selection (.e., initial column list empty), exclusive selection (.e., initial column list contains columns). means select(mtcars, contains(\"\"), contains(\"FOO\"), contains(\"vs\")) now returns vs columns like dplyr 0.4.3 (#2275, #2289, @r2evans). Select helpers now throw error called variables set (#2452) Helper functions select() (related verbs) now evaluated context column names exist (#2184). select() (internal function select_vars()) now support column names addition column positions. result, expressions like select(mtcars, \"cyl\") now allowed.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"other-0-7-0","dir":"Changelog","previous_headings":"Verbs","what":"Other","title":"dplyr 0.7.0","text":"recode(), case_when() coalesce() now support splicing arguments rlang’s !!! operator. count() now preserves grouping input (#2021). distinct() longer duplicates variables (#2001). Empty distinct() grouped data frame works way empty distinct() ungrouped data frame, namely uses variables (#2476). copy_to() now returns output invisibly (since ’re often just calling side-effect). filter() lag() throw informative error used ts objects (#2219) mutate() recycles list columns length 1 (#2171). mutate() gives better error message attempting add non-vector column (#2319), attempting remove column NULL (#2187, #2439). summarise() now correctly evaluates newly created factors (#2217), can create ordered factors (#2200). Ungrouped summarise() uses summary variables correctly (#2404, #2453). Grouped summarise() longer converts character NA empty strings (#1839).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"combining-and-comparing-0-7-0","dir":"Changelog","previous_headings":"","what":"Combining and comparing","title":"dplyr 0.7.0","text":"all_equal() now reports multiple problems character vector (#1819, #2442). all_equal() checks factor levels equal (#2440, #2442). bind_rows() bind_cols() give error database tables (#2373). bind_rows() works correctly NULL arguments .id argument (#2056), also zero-column data frames (#2175). Breaking change: bind_rows() combine() strict coercing. Logical values longer coerced integer numeric. Date, POSIXct integer double-based classes longer coerced integer double chance attributes information lost (#2209, @zeehio). bind_cols() now calls tibble::repair_names() ensure names unique (#2248). bind_cols() handles empty argument list (#2048). bind_cols() better handles NULL inputs (#2303, #2443). bind_rows() explicitly rejects columns containing data frames (#2015, #2446). bind_rows() bind_cols() now accept vectors. treated rows former columns latter. Rows require inner names like c(col1 = 1, col2 = 2), columns require outer names: col1 = c(1, 2). Lists still treated data frames can spliced explicitly !!!, e.g. bind_rows(!!! x) (#1676). rbind_list() rbind_all() now call .Deprecated(), removed next CRAN release. Please use bind_rows() instead. combine() accepts NA values (#2203, @zeehio) combine() bind_rows() character factor types now always warn coercion character (#2317, @zeehio) combine() bind_rows() accept difftime objects. mutate coerces results grouped dataframes accepting combinable data types (integer numeric). (#1892, @zeehio)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vector-functions-0-7-0","dir":"Changelog","previous_headings":"","what":"Vector functions","title":"dplyr 0.7.0","text":"%% gets new hybrid handler (#126). () returns NA left right NA (fixes #2562). case_when() supports NA values (#2000, @tjmahr). first(), last(), nth() better default values factor, Dates, POSIXct, data frame inputs (#2029). Fixed segmentation faults hybrid evaluation first(), last(), nth(), lead(), lag(). functions now always fall back R implementation called arguments hybrid evaluator handle (#948, #1980). n_distinct() gets larger hash tables given slightly better performance (#977). nth() ntile() careful proper data types return values (#2306). ntile() ignores NA computing group membership (#2564). lag() enforces integer n (#2162, @kevinushey). hybrid min() max() now always return numeric work correctly edge cases (empty input, NA, …) (#2305, #2436). min_rank(\"string\") longer segfaults hybrid evaluation (#2279, #2444). recode() can now recode factor types (#2268) recode() gains .dots argument support passing replacements list (#2110, @jlegewie).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"other-minor-changes-and-bug-fixes-0-7-0","dir":"Changelog","previous_headings":"","what":"Other minor changes and bug fixes","title":"dplyr 0.7.0","text":"Many error messages helpful referring column name position argument list (#2448). New is_grouped_df() alias .grouped_df(). tbl_vars() now group_vars argument set TRUE default. FALSE, group variables returned. Fixed segmentation fault calling rename() invalid grouped data frame (#2031). rename_vars() gains strict argument control error thrown try rename variable doesn’t exist. Fixed undefined behavior slice() zero-column data frame (#2490). Fixed rare case false match join (#2515). Restricted workaround match() R 3.3.0. (#1858). dplyr now warns load version R Rcpp installation different currently installed version (#2514). Fixed improper reuse attributes creating list column summarise() perhaps mutate() (#2231). mutate() summarise() always strip names attribute new updated columns, even ungrouped operations (#1689). Fixed rare error lead segmentation fault all_equal(ignore_col_order = FALSE) (#2502). “dim” “dimnames” attributes always stripped copying vector (#1918, #2049). grouped_df rowwise registered officially S3 classes. makes easier use S4 (#2276, @joranE, #2789). operations return tibbles now include \"tbl\" class. important correct printing tibble 1.3.1 (#2789). Makeflags uses PKG_CPPFLAGS defining preprocessor macros. astyle formatting C++ code, tested changed part tests (#2086, #2103). Update RStudio project settings install tests (#1952). Using Rcpp::interfaces() register C callable interfaces, registering native exported functions via R_registerRoutines() useDynLib(.registration = TRUE) (#2146). Formatting grouped data frames now works overriding tbl_sum() generic instead print(). means output consistent tibble, format() now supported also SQL sources (#2781).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-050","dir":"Changelog","previous_headings":"","what":"dplyr 0.5.0","title":"dplyr 0.5.0","text":"CRAN release: 2016-06-24","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"existing-functions-0-5-0","dir":"Changelog","previous_headings":"Breaking changes","what":"Existing functions","title":"dplyr 0.5.0","text":"arrange() ignores grouping (#1206). distinct() now keeps distinct variables. want return variables (using first row non-distinct values) use .keep_all = TRUE (#1110). SQL sources, .keep_all = FALSE implemented using GROUP , .keep_all = TRUE raises error (#1937, #1942, @krlmlr). (default behaviour using variables none specified remains - note applies select variables). select helper functions starts_with(), ends_with() etc now real exported functions. means ’ll need import functions ’re using package dplyr attached. .e. dplyr::select(mtcars, starts_with(\"m\")) used work, now ’ll need dplyr::select(mtcars, dplyr::starts_with(\"m\")).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-and-defunct-functions-0-5-0","dir":"Changelog","previous_headings":"Breaking changes","what":"Deprecated and defunct functions","title":"dplyr 0.5.0","text":"long deprecated chain(), chain_q() %.% removed. Please use %>% instead. id() deprecated. Please use group_indices() instead (#808). rbind_all() rbind_list() formally deprecated. Please use bind_rows() instead (#803). Outdated benchmarking demos removed (#1487). Code related starting signalling clusters moved multidplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-5-0","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.5.0","text":"coalesce() finds first non-missing value set vectors. (#1666, thanks @krlmlr initial implementation). case_when() general vectorised + else (#631). if_else() vectorised statement: ’s stricter (type-safe), faster, predictable version ifelse(). SQL translated CASE statement. na_if() makes easy replace certain value NA (#1707). SQL translated NULL_IF. near(x, y) helper abs(x - y) < tol (#1607). recode() vectorised equivalent switch() (#1710). union_all() method. Maps UNION SQL sources, bind_rows() data frames/tbl_dfs, combine() vectors (#1045). new family functions replace summarise_each() mutate_each() (thus deprecated future release). summarise_all() mutate_all() apply function columns summarise_at() mutate_at() operate subset columns. columns selected either character vector columns names, numeric vector column positions, column specification select() semantics generated new columns() helper. addition, summarise_if() mutate_if() take predicate function logical vector (verbs currently require local sources). functions can now take ordinary functions instead list functions generated funs() (though useful local sources). (#1845, @lionel-) select_if() lets select columns predicate function. compatible local sources. (#497, #1569, @lionel-)","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dtplyr-0-5-0","dir":"Changelog","previous_headings":"Local backends","what":"dtplyr","title":"dplyr 0.5.0","text":"data table related code separated new dtplyr package. decouples development data.table interface development dplyr package. data.table dplyr loaded, ’ll get message reminding load dtplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"tibble-0-5-0","dir":"Changelog","previous_headings":"Local backends","what":"Tibble","title":"dplyr 0.5.0","text":"Functions related creation coercion tbl_dfs, now live package: tibble. See vignette(\"tibble\") details. $ [[ methods never partial matching (#1504), throw error variable exist. all_equal() allows compare data frames ignoring row column order, optionally ignoring minor differences type (e.g. int vs. double) (#821). test handles case df 0 columns (#1506). test fails fails convert FALSE types don’t match (#1484). all_equal() shows better error message comparing raw values types incompatible convert = TRUE (#1820, @krlmlr). add_row() makes easy add new row data frame (#1021) as_data_frame() now S3 generic methods lists (old as_data_frame()), data frames (trivial), matrices (efficient C++ implementation) (#876). longer strips subclasses. internals data_frame() as_data_frame() aligned, as_data_frame() now automatically recycle length-1 vectors. functions give informative error messages attempting create invalid data frame. can longer create data frame duplicated names (#820). check POSIXlt columns, tell use POSIXct instead (#813). frame_data() properly constructs rectangular tables (#1377, @kevinushey), supports list-cols. glimpse() now generic. default method dispatches str() (#1325). now (invisibly) returns first argument (#1570). lst() lst_() create lists way data_frame() data_frame_() create data frames (#1290). print.tbl_df() considerably faster wide data frames. now also list first 100 additional variables already screen - control new n_extra parameter print() (#1161). printing grouped data frame number groups now printed thousands separators (#1398). type list columns correctly printed (#1379) Package includes setOldClass(c(\"tbl_df\", \"tbl\", \"data.frame\")) help S4 dispatch (#969). tbl_df automatically generates column names (#1606).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"tbl_cube-0-5-0","dir":"Changelog","previous_headings":"Local backends","what":"tbl_cube","title":"dplyr 0.5.0","text":"new as_data_frame.tbl_cube() (#1563, @krlmlr). tbl_cubes now constructed correctly data frames, duplicate dimension values detected, missing dimension values filled NA. construction data frames now guesses measure variables default, allows specification dimension /measure variables (#1568, @krlmlr). Swap order dim_names met_name arguments .tbl_cube (array, table matrix) consistency tbl_cube .tbl_cube.data.frame. Also, met_name argument .tbl_cube.table now defaults \"Freq\" consistency .data.frame.table (@krlmlr, #1374).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"remote-backends-0-5-0","dir":"Changelog","previous_headings":"","what":"Remote backends","title":"dplyr 0.5.0","text":"as_data_frame() SQL sources now returns rows (#1752, #1821, @krlmlr). compute() gets new parameters indexes unique_indexes make easier add indexes (#1499, @krlmlr). db_explain() gains default method DBIConnections (#1177). backend testing system improved. lead removal temp_srcs(). unlikely event using function, can instead use test_register_src(), test_load(), test_frame(). can now use right_join() full_join() remote tables (#1172).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"sqlite-0-5-0","dir":"Changelog","previous_headings":"Remote backends","what":"SQLite","title":"dplyr 0.5.0","text":"src_memdb() session-local -memory SQLite database. memdb_frame() works like data_frame(), creates new table database. src_sqlite() now uses stricter quoting character, `, instead \". SQLite “helpfully” convert \"x\" string identifier called x current scope (#1426). src_sqlite() throws errors try use window functions (#907).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"sql-translation-0-5-0","dir":"Changelog","previous_headings":"Remote backends","what":"SQL translation","title":"dplyr 0.5.0","text":"filter.tbl_sql() now puts parens around argument (#934). Unary - better translated (#1002). escape.POSIXt() method makes easier use date times. date rendered ISO 8601 format UTC, work databases (#857). .na() gets missing space (#1695). , .na(), .null() get extra parens make precedence clear (#1695). pmin() pmax() translated MIN() MAX() (#1711). Window functions: Work ungrouped data (#1061). Warning order set cumulative window functions. Multiple partitions ordering variables windowed functions longer generate extra parentheses, work databases (#1060)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"internals-0-5-0","dir":"Changelog","previous_headings":"Remote backends","what":"Internals","title":"dplyr 0.5.0","text":"version includes almost total rewrite dplyr verbs translated SQL. Previously, used rather ad-hoc approach, tried guess new subquery needed. Unfortunately approach fraught bugs, version ’ve implemented much richer internal data model. Now three step process: applied tbl_lazy, dplyr verb captures inputs stores op (short operation) object. sql_build() iterates operations building build object represents SQL query. objects convenient testing lists, backend agnostics. sql_render() iterates queries generates SQL, using generics (like sql_select()) can vary based backend. short-term, increased abstraction likely lead minor performance decreases, chance dplyr generating correct SQL much much higher. long-term, abstractions make possible write query optimiser/compiler dplyr, make possible generate much succinct queries. written dplyr backend, ’ll need make minor changes package: sql_join() considerably simplified - now responsible generating join query, generating intermediate selects rename variable. Similarly sql_semi_join(). ’ve provided new methods backend, ’ll need rewrite. select_query() gains distinct argument used generating queries distinct(). loses offset argument never used (hence never tested). src_translate_env() replaced sql_translate_env() methods connection object. two tweaks exported API, less likely affect anyone. translate_sql() partial_eval() got new API: now use connection + variable names, rather tbl. makes testing considerably easier. translate_sql_q() renamed translate_sql_(). Also note sql generation generics now default method, instead methods DBIConnection NULL.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"single-table-verbs-0-5-0","dir":"Changelog","previous_headings":"Minor improvements and bug fixes","what":"Single table verbs","title":"dplyr 0.5.0","text":"Avoiding segfaults presence raw columns (#1803, #1817, @krlmlr). arrange() fails gracefully list columns (#1489) matrices (#1870, #1945, @krlmlr). count() now adds additional grouping variables, rather overriding existing (#1703). tally() count() can now count variable called n (#1633). Weighted count()/tally() ignore NAs (#1145). progress bar () now updated 20 times per second, avoiding unnecessary redraws (#1734, @mkuhn) distinct() doesn’t crash given 0-column data frame (#1437). filter() throws error supply named arguments. usually type: filter(df, x = 1) instead filter(df, x == 1) (#1529). summarise() correctly coerces factors different levels (#1678), handles min/max already summarised variable (#1622), supports data frames columns (#1425). select() now informs adds missing grouping variables (#1511). works even grouping variable non-syntactic name (#1138). Negating failed match (e.g. select(mtcars, -contains(\"x\"))) returns columns, instead columns (#1176) select() helpers now exported documentation (#1410). one_of() gives useful error message variables names found data frame (#1407). naming behaviour summarise_each() mutate_each() tweaked can force inclusion function variable name: summarise_each(mtcars, funs(mean = mean), everything()) (#442). mutate() handles factors NA (#1645), different levels different groups (#1414). disambiguates NA NaN (#1448), silently promotes groups contain NA (#1463). deep copies data list columns (#1643), correctly fails incompatible columns (#1641). mutate() grouped data longer groups grouping attributes (#1120). rowwise() mutate gives expected results (#1381). one_of() tolerates unknown variables vars, warns (#1848, @jennybc). print.grouped_df() passes ... print() (#1893). slice() correctly handles grouped attributes (#1405). ungroup() generic gains ... (#922).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dual-table-verbs-0-5-0","dir":"Changelog","previous_headings":"Minor improvements and bug fixes","what":"Dual table verbs","title":"dplyr 0.5.0","text":"bind_cols() matches behaviour bind_rows() ignores NULL inputs (#1148). also handles POSIXcts integer base type (#1402). bind_rows() handles 0-length named lists (#1515), promotes factors characters (#1538), warns binding factor character (#1485). bind_rows()` flexible way can accept data frames, lists, list data frames, list lists (#1389). bind_rows() rejects POSIXlt columns (#1875, @krlmlr). bind_cols() bind_rows() infer classes grouping information first data frame (#1692). rbind() cbind() get grouped_df() methods make harder create corrupt data frames (#1385). still prefer bind_rows() bind_cols(). Joins now use correct class joining POSIXct columns (#1582, @joel23888), consider time zones (#819). Joins handle empty (#1496), duplicates (#1192). Suffixes grow progressively avoid creating repeated column names (#1460). Joins string columns substantially faster (#1386). Extra attributes ok identical (#1636). Joins work correct factor levels equal (#1712, #1559). Anti- semi-joins give correct result variable factor (#1571), warn factor levels inconsistent (#2741). clear error message given joins explicit contains unavailable columns (#1928, #1932). Warnings join column inconsistencies now contain column names (#2728). inner_join(), left_join(), right_join(), full_join() gain suffix argument allows control suffix duplicated variable names receive (#1296). Set operations (intersect(), union() etc) respect coercion rules (#799). setdiff() handles factors NA levels (#1526). number fixes enable joining data frames don’t encoding column names (#1513), including working around bug 16885 regarding match() R 3.3.0 (#1806, #1810, @krlmlr).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vector-functions-0-5-0","dir":"Changelog","previous_headings":"Minor improvements and bug fixes","what":"Vector functions","title":"dplyr 0.5.0","text":"combine() silently drops NULL inputs (#1596). Hybrid cummean() stable floating point errors (#1387). Hybrid lead() lag() received considerable overhaul. careful complicated expressions (#1588), falls back readily pure R evaluation (#1411). behave correctly summarise() (#1434). handle default values string columns. Hybrid min() max() handle empty sets (#1481). n_distinct() uses multiple arguments data frames (#1084), falls back R evaluation needed (#1657), reverting decision made (#567). Passing arguments gives error (#1957, #1959, @krlmlr). nth() now supports negative indices select end, e.g. nth(x, -2) selects 2nd value end x (#1584). top_n() can now also select bottom n values passing negative value n (#1008, #1352). Hybrid evaluation leaves formulas untouched (#1447).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-043","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.3","title":"dplyr 0.4.3","text":"CRAN release: 2015-09-01","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"improved-encoding-support-0-4-3","dir":"Changelog","previous_headings":"","what":"Improved encoding support","title":"dplyr 0.4.3","text":"now, dplyr’s support non-UTF8 encodings rather shaky. release brings number improvement fix problems: ’s probably perfect, lot better previously version. includes fixes arrange() (#1280), bind_rows() (#1265), distinct() (#1179), joins (#1315). print.tbl_df() also received fix strings invalid encodings (#851).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"other-minor-improvements-and-bug-fixes-0-4-3","dir":"Changelog","previous_headings":"","what":"Other minor improvements and bug fixes","title":"dplyr 0.4.3","text":"frame_data() provides means constructing data_frames using simple row-wise language. (#1358, @kevinushey) .equal() longer runs outputs together (#1130). as_data_frame() gives better error message NA column names (#1101). [.tbl_df careful subsetting column names (#1245). arrange() mutate() work empty data frames (#1142). arrange(), filter(), slice(), summarise() preserve data frame meta attributes (#1064). bind_rows() bind_cols() accept lists (#1104): initial data cleaning longer need convert lists data frames, can instead feed bind_rows() directly. bind_rows() gains .id argument. supplied, creates new column gives name data frame (#1337, @lionel-). bind_rows() respects ordered attribute factors (#1112), better comparing POSIXcts (#1125). tz attribute ignored determining two POSIXct vectors comparable. tz inputs , ’s used, otherwise set UTC. data_frame() always produces tbl_df (#1151, @kevinushey) filter(x, TRUE, TRUE) now just returns x (#1210), doesn’t internally modify first argument (#971), now works rowwise data (#1099). works data tables (#906). glimpse() also prints number variables addition number observations (@ilarischeinin, #988). Joins handles matrix columns better (#1230), can join Date objects heterogeneous representations (Dates integers, numeric). also improves .equal() (#1204). Fixed percent_rank() cume_dist() missing values longer affect denominator (#1132). print.tbl_df() now displays class variables, just don’t fit screen (#1276). also displays duplicated column names correctly (#1159). print.grouped_df() now tells many groups . mutate() can set NULL first column (used segfault, #1329) better protects intermediary results (avoiding random segfaults, #1231). mutate() grouped data handles special case first groups, result consists logical vector NA. can happen condition ifelse NA logical vector (#958). mutate.rowwise_df() handles factors (#886) correctly handles 0-row inputs (#1300). n_distinct() gains na_rm argument (#1052). Progress bar used () now respects global option dplyr.show_progress (default TRUE) can turn globally (@jimhester #1264, #1226). summarise() handles expressions returning heterogenous outputs, e.g. median(), sometimes returns integer, times numeric (#893). slice() silently drops columns corresponding NA (#1235). ungroup.rowwise_df() gives tbl_df (#936). explicit duplicated column name error message (#996). “,” already used decimal point (getOption(\"OutDec\")), use “.” thousands separator printing formatted numbers (@ilarischeinin, #988).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"databases-0-4-3","dir":"Changelog","previous_headings":"","what":"Databases","title":"dplyr 0.4.3","text":"db_query_fields.SQLiteConnection uses build_sql rather paste0 (#926, @NikNakk) Improved handling log() (#1330). n_distinct(x) translated COUNT(DISTINCT(x)) (@skparkes, #873). print(n = Inf) now works remote sources (#1310).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"hybrid-evaluation-0-4-3","dir":"Changelog","previous_headings":"","what":"Hybrid evaluation","title":"dplyr 0.4.3","text":"Hybrid evaluation take place objects class (#1237). Improved $ handling (#1134). Simplified code lead() lag() make sure work properly factors (#955). respect default argument (#915). mutate can set NULL first column (used segfault, #1329). filter grouped data handles indices correctly (#880). sum() issues warning integer overflow (#1108).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-042","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.2","title":"dplyr 0.4.2","text":"CRAN release: 2015-06-16 minor release containing fixes number crashes issues identified R CMD CHECK. one new “feature”: dplyr longer complains unrecognised attributes, instead just copies output. lag() lead() grouped data confused indices therefore produced wrong results (#925, #937). lag() overrides lag() instead just default method lag.default(). necessary due changes R CMD check. use lag function provided another package, use pkg::lag. Fixed number memory issues identified valgrind. Improved performance working large number columns (#879). Lists-cols contain data frames now print slightly nicer summary (#1147) Set operations give useful error message incompatible data frames (#903). .equal() gives correct result ignore_row_order TRUE (#1065) .equal() correctly handles character missing values (#1095). bind_cols() always produces tbl_df (#779). bind_rows() gains test form data frame corruption (#1074). bind_rows() summarise() now handles complex columns (#933). Workaround using constructor DataFrame unprotected object (#998) Improved performance working large number columns (#879).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-041","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.1","title":"dplyr 0.4.1","text":"CRAN release: 2015-01-14 Don’t assume RPostgreSQL available.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-040","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.0","title":"dplyr 0.4.0","text":"CRAN release: 2015-01-08","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-0-4-0","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 0.4.0","text":"add_rownames() turns row names explicit variable (#639). as_data_frame() efficiently coerces list data frame (#749). bind_rows() bind_cols() efficiently bind list data frames row column. combine() applies coercion rules vectors (works like c() unlist() consistent bind_rows() rules). right_join() (include rows y, matching rows x) full_join() (include rows x y) complete family mutating joins (#96). group_indices() computes unique integer id group (#771). can called grouped_df without arguments data frame arguments group_by().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-vignettes-0-4-0","dir":"Changelog","previous_headings":"","what":"New vignettes","title":"dplyr 0.4.0","text":"vignette(\"data_frames\") describes dplyr functions make easier faster create coerce data frames. subsumes old memory vignette. vignette(\"two-table\") describes two-table verbs work dplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-0-4-0","dir":"Changelog","previous_headings":"","what":"Minor improvements","title":"dplyr 0.4.0","text":"data_frame() (as_data_frame() & tbl_df()) now explicitly forbid columns data frames matrices (#775). columns must either 1d atomic vector 1d list. () uses lazyeval correctly evaluate arguments correct environment (#744), new do_() SE equivalent () (#718). can modify grouped data place: probably bad idea ’s sometimes convenient (#737). () grouped data tables now passes columns (columns except grouping vars) (#735, thanks @kismsu). () database tables longer potentially includes grouping variables twice (#673). Finally, () gives consistent outputs rows groups (#625). first() last() preserve factors, dates times (#509). Overhaul single table verbs data.table backend. now use consistent (simpler) code base. ensures (e.g.) n() now works verbs (#579). *_join(), can now name variables different two tables, e.g. inner_join(x, y, c(\"\", \"b\", \"c\" = \"d\")) (#682). non-join columns , dplyr add .x .y suffixes distinguish source (#655). mutate() handles complex vectors (#436) forbids POSIXlt results (instead crashing) (#670). select() now implements sophisticated algorithm ’re multiples includes excludes without names, ’re likely get expect (#644). ’ll also get better error message supply input doesn’t resolve integer column position (#643). Printing received number small tweaks. print() methods invisibly return input can interleave print() statements pipeline see interim results. print() column names 0 row data frames (#652), never print 20 rows (.e. options(dplyr.print_max) now 20), 100 (#710). Row names never printed since dplyr method guaranteed preserve (#669). glimpse() prints number observations (#692) type_sum() gains data frame method. summarise() handles list output columns (#832) slice() works data tables (#717). Documentation clarifies slice can’t work relational databases, examples show achieve results using filter() (#720). dplyr now requires RSQLite >= 1.0. shouldn’t affect code way (except RSQLite now doesn’t need attached) simplify internals (#622). Functions need combine multiple results single column (e.g. join(), bind_rows() summarise()) careful coercion. Joining factors levels order preserves original levels (#675). Joining factors non-identical levels generates warning coerces character (#684). Joining character factor (vice versa) generates warning coerces character. Avoid warnings ensuring data compatible joining. rbind_list() throw error attempt combine integer factor (#751). rbind()ing column full NAs allowed just collects appropriate missing value column type collected (#493). summarise() careful NA, e.g. decision result type delayed first non NA value returned (#599). complain loss precision coercions, can happen expressions return integers groups doubles others (#599). number functions gained new improved hybrid handlers: first(), last(), nth() (#626), lead() & lag() (#683), %% (#126). means use functions dplyr verb, handle C++, rather calling back R, hence improving performance. Hybrid min_rank() correctly handles NaN values (#726). Hybrid implementation nth() falls back R evaluation n length one integer numeric, e.g. ’s expression (#734). Hybrid dense_rank(), min_rank(), cume_dist(), ntile(), row_number() percent_rank() now preserve NAs (#774) filter returns input rows columns (#782). Join functions keep attributes (e.g. time zone information) left argument POSIXct Date objects (#819), warn incompatibility (#798).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-4-0","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.4.0","text":"[.tbl_df correctly computes row names 0-column data frames, avoiding problems xtable (#656). [.grouped_df silently drop grouping don’t include grouping columns (#733). data_frame() now acts correctly first argument vector recycled. (#680 thanks @jimhester) filter.data.table() works table variable called “V1” (#615). *_join() keeps columns original order (#684). Joining factor character vector doesn’t segfault (#688). *_join functions can now deal multiple encodings (#769), correctly name results (#855). *_join.data.table() works data.table isn’t attached (#786). group_by() data table preserves original order rows (#623). group_by() supports variables 39 characters thanks fix lazyeval (#705). gives meaningful error message variable found data frame (#716). grouped_df() requires vars list symbols (#665). min(.,na.rm = TRUE) works Dates built numeric vectors (#755). rename_() generic gets missing .dots argument (#708). row_number(), min_rank(), percent_rank(), dense_rank(), ntile() cume_dist() handle data frames 0 rows (#762). preserve missing values (#774). row_number() doesn’t segfault giving external variable wrong number variables (#781). group_indices handles edge case variables (#867). Removed bogus NAs introduced coercion integer range 32-bit Windows (#2708).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-0301","dir":"Changelog","previous_headings":"","what":"dplyr 0.3.0.1","title":"dplyr 0.3.0.1","text":"CRAN release: 2014-10-08 Fixed problem test script Windows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-03","dir":"Changelog","previous_headings":"","what":"dplyr 0.3","title":"dplyr 0.3","text":"CRAN release: 2014-10-04","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-3","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.3","text":"() vector function efficiently determines numeric values fall range, translated special form SQL (#503). count() makes even easier (weighted) counts (#358). data_frame() @kevinushey nicer way creating data frames. never coerces column types (stringsAsFactors = FALSE!), never munges column names, never adds row names. can use previously defined columns compute new columns (#376). distinct() returns distinct (unique) rows tbl (#97). Supply additional variables return first row unique combination variables. Set operations, intersect(), union() setdiff() now methods data frames, data tables SQL database tables (#93). pass arguments base functions, ensure raise errors pass two many arguments. Joins (e.g. left_join(), inner_join(), semi_join(), anti_join()) now allow join different variables x y tables supplying named vector . example, = c(\"\" = \"b\") joins x.y.b. n_groups() function tells many groups tbl. returns 1 ungrouped data. (#477) transmute() works like mutate() drops variables didn’t explicitly refer (#302). rename() makes easy rename variables - works similarly select() preserves columns didn’t otherwise touch. slice() allows selecting rows position (#226). includes positive integers, drops negative integers can use expression like n().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"programming-with-dplyr-non-standard-evaluation-0-3","dir":"Changelog","previous_headings":"","what":"Programming with dplyr (non-standard evaluation)","title":"dplyr 0.3","text":"can now program dplyr - every function non-standard evaluation (NSE) standard evaluation (SE) version ending _. powered new lazyeval package provides tools needed implement NSE consistently correctly. See vignette(\"nse\") full details. regroup() deprecated. Please use flexible group_by_() instead. summarise_each_q() mutate_each_q() deprecated. Please use summarise_each_() mutate_each_() instead. funs_q replaced funs_.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"removed-and-deprecated-features-0-3","dir":"Changelog","previous_headings":"","what":"Removed and deprecated features","title":"dplyr 0.3","text":"%.% deprecated: please use %>% instead. chain() defunct. (#518) filter.numeric() removed. Need figure reimplement new lazy eval system. Progress refclass longer exported avoid conflicts shiny. Instead use progress_estimated() (#535). src_monetdb() now implemented MonetDB.R, dplyr. show_sql() explain_sql() matching global options dplyr.show_sql dplyr.explain_sql removed. Instead use show_query() explain().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-and-bug-fixes-0-3","dir":"Changelog","previous_headings":"","what":"Minor improvements and bug fixes","title":"dplyr 0.3","text":"Main verbs now individual documentation pages (#519). %>% simply re-exported magrittr, instead creating local copy (#496, thanks @jimhester) Examples now use nycflights13 instead hflights variables better names interlinked tables (#562). Lahman nycflights13 () suggested packages. means many examples work unless explicitly install install.packages(c(\"Lahman\", \"nycflights13\")) (#508). dplyr now depends Lahman 3.0.1. number examples updated reflect modified field names (#586). () now displays progress bar used interactive prompts knitting (#428, @jimhester). glimpse() now prints trailing new line (#590). group_by() consistent behaviour grouping constants: creates new column value (#410). renames grouping variables (#410). first argument now .data can create new groups name x (#534). Now instead overriding lag(), dplyr overrides lag.default(), avoid clobbering lag methods added packages. (#277). mutate(data, = NULL) removes variable returned dataset (#462). trunc_mat() hence print.tbl_df() friends gets width argument control default output width. Set options(dplyr.width = Inf) always show columns (#589). select() gains one_of() selector: allows select variables provided character vector (#396). fails immediately give empty pattern starts_with(), ends_with(), contains() matches() (#481, @leondutoit). Fixed buglet select() can now create variables called val (#564). Switched RC R6. tally() top_n() work consistently: neither accidentally evaluates wt param. (#426, @mnel) rename handles grouped data (#640).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"databases-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Databases","title":"dplyr 0.3","text":"Correct SQL generation paste() used collapse parameter targeting Postgres database. (@rbdixon, #1357) db backend system completely overhauled order make possible add backends packages, support much wider range databases. See vignette(\"new-sql-backend\") instruction create (#568). src_mysql() gains method explain(). mutate() creates new variable uses window function, automatically wrap result subquery (#484). Correct SQL generation first() last() (#531). order_by() now works conjunction window functions databases support .","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"data-framestbl_df-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Data frames/tbl_df","title":"dplyr 0.3","text":"verbs now understand work difftime() (#390) AsIs (#453) objects. check colnames unique (#483), robust columns present (#348, #569, #600). Hybrid evaluation bugs fixed: Call substitution stopped early sub expression contained $ (#502). Handle :: ::: (#412). cumany() cumall() properly handle NA (#408). nth() now correctly preserve class using dates, times factors (#509). longer substitutes within order_by() order_by() needs NSE (#169). [.tbl_df always returns tbl_df (.e. drop = FALSE default) (#587, #610). [.grouped_df preserves important output attributes (#398). arrange() keeps grouping structure grouped data (#491, #605), preserves input classes (#563). contains() accidentally matched regular expressions, now passes fixed = TRUE grep() (#608). filter() asserts variables white listed (#566). mutate() makes rowwise_df given rowwise_df (#463). rbind_all() creates tbl_df objects instead raw data.frames. select() doesn’t match variables, returns 0-column data frame, instead original (#498). longer fails columns named (#492) sample_n() sample_frac() methods data.frames exported. (#405, @alyst) grouped data frame may 0 groups (#486). Grouped df objects gain basic validity checking, prevent crashes related corrupt grouped_df objects made rbind() (#606). coherence joining columns compatible different types, e.g. joining character vector factor (#455), numeric integer (#450) mutate() works zero-row grouped data frame, list columns (#555). LazySubset confused input data size (#452). Internal n_distinct() stricter inputs: requires one symbol must data frame (#567). rbind_*() handle data frames 0 rows (#597). fill character vector columns NA instead blanks (#595). work list columns (#463). Improved handling encoding column names (#636). Improved handling hybrid evaluation re $ @ (#645).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"data-tables-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Data tables","title":"dplyr 0.3","text":"Fix major omission tbl_dt() grouped_dt() methods - accidentally deep copy every result :( summarise() group_by() now retain -allocation working data.tables (#475, @arunsrinivasan). joining two data.tables now correctly dispatches data table methods, result data table (#470)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"cubes-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Cubes","title":"dplyr 0.3","text":"summarise.tbl_cube() works single grouping variable (#480).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-02","dir":"Changelog","previous_headings":"","what":"dplyr 0.2","title":"dplyr 0.2","text":"CRAN release: 2014-05-21","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"piping-0-2","dir":"Changelog","previous_headings":"","what":"Piping","title":"dplyr 0.2","text":"dplyr now imports %>% magrittr (#330). recommend use instead %.% easier type (since can hold shift key) flexible. %>%, can control argument RHS receives LHS using pronoun .. makes %>% useful base R functions don’t always take data frame first argument. example pipe mtcars xtabs() : Thanks @smbache excellent magrittr package. dplyr provides %>% magrittr, contains many useful functions. use , load magrittr explicitly: library(magrittr). details, see vignette(\"magrittr\"). %.% deprecated future version dplyr, won’t happen . ’ve also deprecated chain() encourage single style dplyr usage: please use %>% instead.","code":"mtcars %>% xtabs( ~ cyl + vs, data = .)"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"do-0-2","dir":"Changelog","previous_headings":"","what":"Do","title":"dplyr 0.2","text":"() completely overhauled. now two ways use , either multiple named arguments single unnamed arguments. group_by() + () equivalent plyr::dlply, except always returns data frame. use named arguments, argument becomes list-variable output. list-variable can contain arbitrary R object ’s particularly well suited storing models. use unnamed argument, result data frame. allows apply arbitrary functions group. Note use . pronoun refer data current group. () also automatic progress bar. appears computation takes longer 5 seconds lets know (approximately) much longer job take complete.","code":"library(dplyr) models <- mtcars %>% group_by(cyl) %>% do(lm = lm(mpg ~ wt, data = .)) models %>% summarise(rsq = summary(lm)$r.squared) mtcars %>% group_by(cyl) %>% do(head(., 1))"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-verbs-0-2","dir":"Changelog","previous_headings":"","what":"New verbs","title":"dplyr 0.2","text":"dplyr 0.2 adds three new verbs: glimpse() makes possible see columns tbl, displaying much data variable can fit single line. sample_n() randomly samples fixed number rows tbl; sample_frac() randomly samples fixed fraction rows. works local data frames data tables (#202). summarise_each() mutate_each() make easy apply one functions multiple columns tbl (#178).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-0-2","dir":"Changelog","previous_headings":"","what":"Minor improvements","title":"dplyr 0.2","text":"load plyr dplyr, ’ll get message suggesting load plyr first (#347). .tbl_cube() gains method matrices (#359, @paulstaab) compute() gains temporary argument can control whether results temporary permanent (#382, @cpsievert) group_by() now defaults add = FALSE sets grouping variables rather adding existing list. think people expected group_by work anyway, ’s unlikely cause problems (#385). Support MonetDB tables src_monetdb() (#8, thanks @hannesmuehleisen). New vignettes: memory vignette discusses dplyr minimises memory usage local data frames (#198). new-sql-backend vignette discusses add new SQL backend/source dplyr. changes() output clearly distinguishes columns added deleted. explain() now generic. dplyr careful setting keys data tables, never accidentally modifies object doesn’t . also avoids unnecessary key setting negatively affected performance. (#193, #255). print() methods tbl_df, tbl_dt tbl_sql gain n argument control number rows printed (#362). also works better columns containing lists complex objects. row_number() can called without arguments, case returns 1:n() (#303). \"comment\" attribute allowed (white listed) well names (#346). hybrid versions min, max, mean, var, sd sum handle na.rm argument (#168). yield substantial performance improvements functions. Special case call arrange() grouped data frame arguments. (#369)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-2","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.2","text":"Code adapted Rcpp > 0.11.1 internal DataDots class protects missing variables verbs (#314), including case ... missing. (#338) .equal.data.frame base longer bypassed. now .equal.tbl_df .equal.tbl_dt methods (#332). arrange() correctly handles NA numeric vectors (#331) 0 row data frames (#289). copy_to.src_mysql() now works windows (#323) *_join() doesn’t reorder column names (#324). rbind_all() stricter accepts list data frames (#288) rbind_* propagates time zone information POSIXct columns (#298). rbind_* less strict type promotion. numeric Collecter allows collection integer logical vectors. integer Collecter also collects logical values (#321). internal sum correctly handles integer (/)flow (#308). summarise() checks consistency outputs (#300) drops names attribute output columns (#357). join functions throw error instead crashing common variables data frames, also give better error message one data frame variable (#371). top_n() returns n rows instead n - 1 (@leondutoit, #367). SQL translation always evaluates subsetting operators ($, [, [[) locally. (#318). select() now renames variables remote sql tbls (#317) implicitly adds grouping variables (#170). internal grouped_df_impl function errors variables group (#398). n_distinct treat NA correctly numeric case #384. compiler warnings triggered -Wall -pedantic eliminated. group_by creates one group NA (#401). Hybrid evaluator evaluate expression correct environment (#403).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-013","dir":"Changelog","previous_headings":"","what":"dplyr 0.1.3","title":"dplyr 0.1.3","text":"CRAN release: 2014-03-15","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-1-3","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.1.3","text":"select() actually renames columns data table (#284). rbind_all() rbind_list() now handle missing values factors (#279). SQL joins now work better names duplicated x y tables (#310). Builds Rcpp 0.11.1 select() correctly works vars attribute (#309). Internal code stricter deciding data frame grouped (#308): avoids number situations previously caused problems. data frame joins work missing values keys (#306).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-012","dir":"Changelog","previous_headings":"","what":"dplyr 0.1.2","title":"dplyr 0.1.2","text":"CRAN release: 2014-02-24","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-0-1-2","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 0.1.2","text":"select() substantially powerful. can use named arguments rename existing variables, new functions starts_with(), ends_with(), contains(), matches() num_range() select variables based names. now also makes shallow copy, substantially reducing memory impact (#158, #172, #192, #232). summarize() added alias summarise() people countries don’t don’t spell things correctly ;) (#245)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-1-2","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.1.2","text":"filter() now fails given anything logical vector, correctly handles missing values (#249). filter.numeric() proxies stats::filter() can continue use filter() function numeric inputs (#264). summarise() correctly uses newly created variables (#259). mutate() correctly propagates attributes (#265) mutate.data.frame() correctly mutates variable repeatedly (#243). lead() lag() preserve attributes, now work dates, times factors (#166). n() never accepts arguments (#223). row_number() gives correct results (#227). rbind_all() silently ignores data frames 0 rows 0 columns (#274). group_by() orders result (#242). also checks columns supported types (#233, #276). hybrid evaluator handle expressions correctly, example (n() > 5) 1 else 2 subexpression n() substituted correctly. also correctly processes $ (#278). arrange() checks columns supported types (#266). also handles list columns (#282). Working towards Solaris compatibility. Benchmarking vignette temporarily disabled due microbenchmark problems reported BDR.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-011","dir":"Changelog","previous_headings":"","what":"dplyr 0.1.1","title":"dplyr 0.1.1","text":"CRAN release: 2014-01-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"improvements-0-1-1","dir":"Changelog","previous_headings":"","what":"Improvements","title":"dplyr 0.1.1","text":"new location() changes() functions provide information data frames stored memory can see gets copied. renamed explain_tbl() explain() (#182). tally() gains sort argument sort output highest counts come first (#173). ungroup.grouped_df(), tbl_df(), .data.frame.tbl_df() now make shallow copies inputs (#191). benchmark-baseball vignette now contains fairer (including grouping times) comparisons data.table. (#222)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-1-1","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.1.1","text":"filter() (#221) summarise() (#194) correctly propagate attributes. summarise() throws error asked summarise unknown variable instead crashing (#208). group_by() handles factors missing values (#183). filter() handles scalar results (#217) better handles scoping, e.g. filter(., variable) variable defined function calls filter. also handles T F aliases TRUE FALSE T F variables data scope. select.grouped_df fails grouping variables included selected variables (#170) .equal.data.frame() handles corner case data frame NULL names (#217) mutate() gives informative error message unsupported types (#179) dplyr source package longer includes pandas benchmark, reducing download size 2.8 MB 0.5 MB.","code":""}] +[{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to dplyr","title":"Contributing to dplyr","text":"outlines propose change dplyr. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to dplyr","text":"Small typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES: edit roxygen comment .R file R/. : edit .Rd file man/.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"prerequisites","dir":"","previous_headings":"","what":"Prerequisites","title":"Contributing to dplyr","text":"make substantial pull request, always file issue make sure someone team agrees ’s problem. ’ve found bug, create associated issue illustrate bug minimal reprex.","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"","what":"Pull request process","title":"Contributing to dplyr","text":"recommend create Git branch pull request (PR). Look Travis AppVeyor build status making changes. README contain badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat. Contributions test cases included easier accept. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s).","code":""},{"path":"https://dplyr.tidyverse.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to dplyr","text":"Please note project released Contributor Code Conduct. participating project agree abide terms.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 dplyr authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/SUPPORT.html","id":null,"dir":"","previous_headings":"","what":"Getting help with dplyr","title":"Getting help with dplyr","text":"Thanks using dplyr. filing issue, places explore pieces put together make process smooth possible. Start making minimal reproducible example using reprex package. haven’t heard used reprex , ’re treat! Seriously, reprex make R-question-asking endeavors easier (pretty insane ROI five ten minutes ’ll take learn ’s ). additional reprex pointers, check Get help! section tidyverse site. Armed reprex, next step figure ask. ’s question: start community.rstudio.com, /StackOverflow. people answer questions. ’s bug: ’re right place, file issue. ’re sure: let community help figure ! problem bug feature request, can easily return report . opening new issue, sure search issues pull requests make sure bug hasn’t reported /already fixed development version. default, search pre-populated :issue :open. can edit qualifiers (e.g. :pr, :closed) needed. example, ’d simply remove :open search issues repo, open closed. right place, need file issue, please review “File issues” paragraph tidyverse contributing guidelines. Thanks help!","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"overview","dir":"Articles","previous_headings":"","what":"Overview","title":"dplyr <-> base R","text":"code dplyr verbs input output data frames. contrasts base R functions frequently work individual vectors. dplyr relies heavily “non-standard evaluation” don’t need use $ refer columns “current” data frame. behaviour inspired base functions subset() transform(). dplyr solutions tend use variety single purpose verbs, base R solutions typically tend use [ variety ways, depending task hand. Multiple dplyr verbs often strung together pipeline %>%. base R, ’ll typically save intermediate results variable either discard, repeatedly overwrite. dplyr verbs handle “grouped” data frames code perform computation per-group looks similar code works whole data frame. base R, per-group operations tend varied forms.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"one-table-verbs","dir":"Articles","previous_headings":"","what":"One table verbs","title":"dplyr <-> base R","text":"following table shows condensed translation dplyr verbs base R equivalents. following sections describe operation detail. ’ll learn dplyr verbs documentation vignette(\"dplyr\"). begin, ’ll load dplyr convert mtcars iris tibbles can easily show abbreviated output operation.","code":"library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris)"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"arrange-arrange-rows-by-variables","dir":"Articles","previous_headings":"One table verbs","what":"arrange(): Arrange rows by variables","title":"dplyr <-> base R","text":"dplyr::arrange() orders rows data frame values one columns: desc() helper allows order selected variables descending order: can replicate base R using [ order(): Note use drop = FALSE. forget , input data frame single column, output vector, data frame. source subtle bugs. Base R provide convenient general way sort individual variables descending order, two options: numeric variables, can use -x. can request order() sort variables descending order.","code":"mtcars %>% arrange(cyl, disp) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 3 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 4 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> # ℹ 28 more rows mtcars %>% arrange(desc(cyl), desc(disp)) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 2 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 3 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 4 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> # ℹ 28 more rows mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 3 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 4 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> # ℹ 28 more rows mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE]"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"distinct-select-distinctunique-rows","dir":"Articles","previous_headings":"One table verbs","what":"distinct(): Select distinct/unique rows","title":"dplyr <-> base R","text":"dplyr::distinct() selects unique rows: two equivalents base R, depending whether want whole data frame, just selected variables:","code":"df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns #> # A tibble: 10 × 1 #> x #> #> 1 7 #> 2 5 #> 3 6 #> 4 4 #> # ℹ 6 more rows df %>% distinct(x, .keep_all = TRUE) # whole data frame #> # A tibble: 10 × 2 #> x y #> #> 1 7 4 #> 2 5 2 #> 3 6 9 #> 4 4 2 #> # ℹ 6 more rows unique(df[\"x\"]) # selected columns #> # A tibble: 10 × 1 #> x #> #> 1 7 #> 2 5 #> 3 6 #> 4 4 #> # ℹ 6 more rows df[!duplicated(df$x), , drop = FALSE] # whole data frame #> # A tibble: 10 × 2 #> x y #> #> 1 7 4 #> 2 5 2 #> 3 6 9 #> 4 4 2 #> # ℹ 6 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"filter-return-rows-with-matching-conditions","dir":"Articles","previous_headings":"One table verbs","what":"filter(): Return rows with matching conditions","title":"dplyr <-> base R","text":"dplyr::filter() selects rows expression TRUE: closest base equivalent (inspiration filter()) subset(): can also use [ also requires use () remove NAs:","code":"starwars %>% filter(species == \"Human\") #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(mass > 1000) #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(hair_color == \"none\" & eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, blue black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> # ℹ 5 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships subset(starwars, species == \"Human\") #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships subset(starwars, mass > 1000) #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships subset(starwars, hair_color == \"none\" & eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, blue black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> # ℹ 5 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[which(starwars$species == \"Human\"), , drop = FALSE] #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 31 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[which(starwars$mass > 1000), , drop = FALSE] #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[which(starwars$hair_color == \"none\" & starwars$eye_color == \"black\"), , drop = FALSE] #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, blue black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> # ℹ 5 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"mutate-create-or-transform-variables","dir":"Articles","previous_headings":"One table verbs","what":"mutate(): Create or transform variables","title":"dplyr <-> base R","text":"dplyr::mutate() creates new variables existing variables: closest base equivalent transform(), note use freshly created variables: Alternatively, can use $<-: applied grouped data frame, dplyr::mutate() computes new variable per group: replicate base R, can use ave():","code":"df %>% mutate(z = x + y, z2 = z ^ 2) #> # A tibble: 100 × 4 #> x y z z2 #> #> 1 7 4 11 121 #> 2 5 2 7 49 #> 3 6 9 15 225 #> 4 4 2 6 36 #> # ℹ 96 more rows head(transform(df, z = x + y, z2 = (x + y) ^ 2)) #> x y z z2 #> 1 7 4 11 121 #> 2 5 2 7 49 #> 3 6 9 15 225 #> 4 4 2 6 36 #> 5 6 3 9 81 #> 6 9 3 12 144 mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) #> # A tibble: 4 × 4 #> # Groups: g [2] #> g x x_mean x_rank #> #> 1 1 0.5 1 1 #> 2 1 1.5 1 2 #> 3 2 2.5 3 1 #> 4 2 3.5 3 2 transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) #> g x x_mean x_rank #> 1 1 0.5 1 1 #> 2 1 1.5 1 2 #> 3 2 2.5 3 1 #> 4 2 3.5 3 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"pull-pull-out-a-single-variable","dir":"Articles","previous_headings":"One table verbs","what":"pull(): Pull out a single variable","title":"dplyr <-> base R","text":"dplyr::pull() extracts variable either name position: equivalent [[ positions $ names:","code":"mtcars %>% pull(1) #> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 #> [29] 15.8 19.7 15.0 21.4 mtcars %>% pull(cyl) #> [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4 mtcars[[1]] #> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 #> [29] 15.8 19.7 15.0 21.4 mtcars$cyl #> [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"relocate-change-column-order","dir":"Articles","previous_headings":"One table verbs","what":"relocate(): Change column order","title":"dplyr <-> base R","text":"dplyr::relocate() makes easy move set columns new position (default, front): can replicate base R little set manipulation: Moving columns somewhere middle requires little set twiddling.","code":"# to front mtcars %>% relocate(gear, carb) #> # A tibble: 32 × 13 #> gear carb mpg cyl disp hp drat wt qsec vs am cyl2 #> #> 1 4 4 21 6 160 110 3.9 2.62 16.5 0 1 12 #> 2 4 4 21 6 160 110 3.9 2.88 17.0 0 1 12 #> 3 4 1 22.8 4 108 93 3.85 2.32 18.6 1 1 8 #> 4 3 1 21.4 6 258 110 3.08 3.22 19.4 1 0 12 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl4 # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) #> # A tibble: 32 × 13 #> disp hp drat wt qsec vs am gear carb cyl2 cyl4 mpg #> #> 1 160 110 3.9 2.62 16.5 0 1 4 4 12 24 21 #> 2 160 110 3.9 2.88 17.0 0 1 4 4 12 24 21 #> 3 108 93 3.85 2.32 18.6 1 1 4 1 8 16 22.8 #> 4 258 110 3.08 3.22 19.4 1 0 3 1 12 24 21.4 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl mtcars[union(c(\"gear\", \"carb\"), names(mtcars))] #> # A tibble: 32 × 13 #> gear carb mpg cyl disp hp drat wt qsec vs am cyl2 #> #> 1 4 4 21 6 160 110 3.9 2.62 16.5 0 1 12 #> 2 4 4 21 6 160 110 3.9 2.88 17.0 0 1 12 #> 3 4 1 22.8 4 108 93 3.85 2.32 18.6 1 1 8 #> 4 3 1 21.4 6 258 110 3.08 3.22 19.4 1 0 12 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl4 to_back <- c(\"mpg\", \"cyl\") mtcars[c(setdiff(names(mtcars), to_back), to_back)] #> # A tibble: 32 × 13 #> disp hp drat wt qsec vs am gear carb cyl2 cyl4 mpg #> #> 1 160 110 3.9 2.62 16.5 0 1 4 4 12 24 21 #> 2 160 110 3.9 2.88 17.0 0 1 4 4 12 24 21 #> 3 108 93 3.85 2.32 18.6 1 1 4 1 8 16 22.8 #> 4 258 110 3.08 3.22 19.4 1 0 3 1 12 24 21.4 #> # ℹ 28 more rows #> # ℹ 1 more variable: cyl "},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"rename-rename-variables-by-name","dir":"Articles","previous_headings":"One table verbs","what":"rename(): Rename variables by name","title":"dplyr <-> base R","text":"dplyr::rename() allows rename variables name position: Renaming variables position straight forward base R: Renaming variables name requires bit work:","code":"iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) #> # A tibble: 150 × 5 #> sepal_length sepal_width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows iris2 <- iris names(iris2)[2] <- \"sepal_width\" names(iris2)[names(iris2) == \"Sepal.Length\"] <- \"sepal_length\""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"rename_with-rename-variables-with-a-function","dir":"Articles","previous_headings":"One table verbs","what":"rename_with(): Rename variables with a function","title":"dplyr <-> base R","text":"dplyr::rename_with() transform column names function: similar effect can achieved setNames() base R:","code":"iris %>% rename_with(toupper) #> # A tibble: 150 × 5 #> SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows setNames(iris, toupper(names(iris))) #> # A tibble: 150 × 5 #> SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> # ℹ 146 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"select-select-variables-by-name","dir":"Articles","previous_headings":"One table verbs","what":"select(): Select variables by name","title":"dplyr <-> base R","text":"dplyr::select() subsets columns position, name, function name, property: Subsetting variables position straightforward base R: two options subset name: Subsetting function name requires bit work grep(): can use Filter() subset type:","code":"iris %>% select(1:3) #> # A tibble: 150 × 3 #> Sepal.Length Sepal.Width Petal.Length #> #> 1 5.1 3.5 1.4 #> 2 4.9 3 1.4 #> 3 4.7 3.2 1.3 #> 4 4.6 3.1 1.5 #> # ℹ 146 more rows iris %>% select(Species, Sepal.Length) #> # A tibble: 150 × 2 #> Species Sepal.Length #> #> 1 setosa 5.1 #> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 #> # ℹ 146 more rows iris %>% select(starts_with(\"Petal\")) #> # A tibble: 150 × 2 #> Petal.Length Petal.Width #> #> 1 1.4 0.2 #> 2 1.4 0.2 #> 3 1.3 0.2 #> 4 1.5 0.2 #> # ℹ 146 more rows iris %>% select(where(is.factor)) #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> # ℹ 146 more rows iris[1:3] # single argument selects columns; never drops #> # A tibble: 150 × 3 #> Sepal.Length Sepal.Width Petal.Length #> #> 1 5.1 3.5 1.4 #> 2 4.9 3 1.4 #> 3 4.7 3.2 1.3 #> 4 4.6 3.1 1.5 #> # ℹ 146 more rows iris[1:3, , drop = FALSE] #> # A tibble: 3 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa iris[c(\"Species\", \"Sepal.Length\")] #> # A tibble: 150 × 2 #> Species Sepal.Length #> #> 1 setosa 5.1 #> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 #> # ℹ 146 more rows subset(iris, select = c(Species, Sepal.Length)) #> # A tibble: 150 × 2 #> Species Sepal.Length #> #> 1 setosa 5.1 #> 2 setosa 4.9 #> 3 setosa 4.7 #> 4 setosa 4.6 #> # ℹ 146 more rows iris[grep(\"^Petal\", names(iris))] #> # A tibble: 150 × 2 #> Petal.Length Petal.Width #> #> 1 1.4 0.2 #> 2 1.4 0.2 #> 3 1.3 0.2 #> 4 1.5 0.2 #> # ℹ 146 more rows Filter(is.factor, iris) #> # A tibble: 150 × 1 #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> # ℹ 146 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"summarise-reduce-multiple-values-down-to-a-single-value","dir":"Articles","previous_headings":"One table verbs","what":"summarise(): Reduce multiple values down to a single value","title":"dplyr <-> base R","text":"dplyr::summarise() computes one summaries group: think closest base R equivalent uses (). Unfortunately () returns list data frames, can combine back together .call() rbind(): aggregate() comes close providing elegant answer: unfortunately looks like disp.mean disp.n columns, ’s actually single matrix column: can see variety options https://gist.github.com/hadley/c430501804349d382ce90754936ab8ec.","code":"mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) #> # A tibble: 3 × 3 #> cyl mean n #> #> 1 4 105. 11 #> 2 6 183. 7 #> 3 8 353. 14 mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) #> cyl mean n #> 4 4 105.1364 11 #> 6 6 183.3143 7 #> 8 8 353.1000 14 agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg #> cyl disp.mean disp.n #> 1 4 105.1364 11.0000 #> 2 6 183.3143 7.0000 #> 3 8 353.1000 14.0000 str(agg) #> 'data.frame': 3 obs. of 2 variables: #> $ cyl : num 4 6 8 #> $ disp: num [1:3, 1:2] 105 183 353 11 7 ... #> ..- attr(*, \"dimnames\")=List of 2 #> .. ..$ : NULL #> .. ..$ : chr [1:2] \"mean\" \"n\""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"slice-choose-rows-by-position","dir":"Articles","previous_headings":"One table verbs","what":"slice(): Choose rows by position","title":"dplyr <-> base R","text":"slice() selects rows location: straightforward replicate [:","code":"slice(mtcars, 25:n()) #> # A tibble: 8 × 13 #> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 #> #> 1 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 16 #> 2 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 8 #> 3 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 8 #> 4 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 8 #> # ℹ 4 more rows #> # ℹ 1 more variable: cyl4 mtcars[25:nrow(mtcars), , drop = FALSE] #> # A tibble: 8 × 13 #> mpg cyl disp hp drat wt qsec vs am gear carb cyl2 #> #> 1 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 16 #> 2 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 8 #> 3 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 8 #> 4 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 8 #> # ℹ 4 more rows #> # ℹ 1 more variable: cyl4 "},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"two-table-verbs","dir":"Articles","previous_headings":"","what":"Two-table verbs","title":"dplyr <-> base R","text":"want merge two data frames, x y), variety different ways bring together. Various base R merge() calls replaced variety dplyr join() functions. information two-table verbs, see vignette(\"two-table\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"mutating-joins","dir":"Articles","previous_headings":"Two-table verbs","what":"Mutating joins","title":"dplyr <-> base R","text":"dplyr’s inner_join(), left_join(), right_join(), full_join() add new columns y x, matching rows based set “keys”, differ missing matches handled. equivalent calls merge() various settings , .x, .y arguments. main difference order rows: dplyr preserves order x data frame. merge() sorts key columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/base.html","id":"filtering-joins","dir":"Articles","previous_headings":"Two-table verbs","what":"Filtering joins","title":"dplyr <-> base R","text":"dplyr’s semi_join() anti_join() affect rows, columns: can replicated base R [ %%: Semi anti joins multiple key variables considerably challenging implement.","code":"band_members %>% semi_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles band_members %>% anti_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 1 × 2 #> name band #> #> 1 Mick Stones band_members[band_members$name %in% band_instruments$name, , drop = FALSE] #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] #> # A tibble: 1 × 2 #> name band #> #> 1 Mick Stones"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"basic-usage","dir":"Articles","previous_headings":"","what":"Basic usage","title":"Column-wise operations","text":"across() two primary arguments: first argument, .cols, selects columns want operate . uses tidy selection (like select()) can pick variables position, name, type. second argument, .fns, function list functions apply column. can also purrr style formula (list formulas) like ~ .x / 2. (argument optional, can omit just want get underlying data; ’ll see technique used vignette(\"rowwise\").) couple examples across() conjunction favourite verb, summarise(). can use across() dplyr verb, ’ll see little later. across() usually used combination summarise() mutate(), doesn’t select grouping variables order avoid accidentally modifying :","code":"starwars %>% summarise(across(where(is.character), n_distinct)) #> # A tibble: 1 × 8 #> name hair_color skin_color eye_color sex gender homeworld species #> #> 1 87 12 31 15 5 3 49 38 starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) #> # A tibble: 9 × 4 #> species sex gender homeworld #> #> 1 Droid 1 2 3 #> 2 Gungan 1 1 1 #> 3 Human 2 2 15 #> 4 Kaminoan 2 2 1 #> # ℹ 5 more rows starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 10 × 4 #> homeworld height mass birth_year #> #> 1 Alderaan 176. 64 43 #> 2 Corellia 175 78.5 25 #> 3 Coruscant 174. 50 91 #> 4 Kamino 208. 83.1 31.5 #> # ℹ 6 more rows df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) #> # A tibble: 2 × 3 #> g x y #> #> 1 1 0 -5 #> 2 2 3 -9"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"multiple-functions","dir":"Articles","previous_headings":"Basic usage","what":"Multiple functions","title":"Column-wise operations","text":"can transform variable one function supplying named list functions lambda functions second argument: Control names created .names argument takes glue spec: ’d prefer summaries function grouped together, ’ll expand calls : (One day might become argument across() ’re yet sure work.) however use (.numeric) last case second across() pick variables newly created (“min_height”, “min_mass” “min_birth_year”). can work around combining calls across() single expression returns tibble: Alternatively reorganize results relocate():","code":"min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) #> # A tibble: 1 × 6 #> height_min height_max mass_min mass_max birth_year_min birth_year_max #> #> 1 66 264 15 1358 8 896 starwars %>% summarise(across(c(height, mass, birth_year), min_max)) #> # A tibble: 1 × 6 #> height_min height_max mass_min mass_max birth_year_min birth_year_max #> #> 1 66 264 15 1358 8 896 starwars %>% summarise(across(where(is.numeric), min_max, .names = \"{.fn}.{.col}\")) #> # A tibble: 1 × 6 #> min.height max.height min.mass max.mass min.birth_year max.birth_year #> #> 1 66 264 15 1358 8 896 starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = \"{.fn}.{.col}\")) #> # A tibble: 1 × 6 #> min.height max.height min.mass max.mass min.birth_year max.birth_year #> #> 1 66 264 15 1358 8 896 starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = \"min_{.col}\"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = \"max_{.col}\") ) #> # A tibble: 1 × 6 #> min_height min_mass min_birth_year max_height max_mass max_birth_year #> #> 1 66 15 8 264 1358 896 starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = \"min_{.col}\"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = \"max_{.col}\") ) ) #> # A tibble: 1 × 6 #> min_height min_mass min_birth_year max_height max_mass max_birth_year #> #> 1 66 15 8 264 1358 896 starwars %>% summarise(across(where(is.numeric), min_max, .names = \"{.fn}.{.col}\")) %>% relocate(starts_with(\"min\")) #> # A tibble: 1 × 6 #> min.height min.mass min.birth_year max.height max.mass max.birth_year #> #> 1 66 15 8 264 1358 896"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"current-column","dir":"Articles","previous_headings":"Basic usage","what":"Current column","title":"Column-wise operations","text":"need , can access name “current” column inside calling cur_column(). can useful want perform sort context dependent transformation ’s already encoded vector:","code":"df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) #> # A tibble: 3 × 3 #> x y z #> #> 1 1 30 500 #> 2 2 40 600 #> 3 3 50 700"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"gotchas","dir":"Articles","previous_headings":"Basic usage","what":"Gotchas","title":"Column-wise operations","text":"careful combining numeric summaries (.numeric): n becomes NA n numeric, across() computes standard deviation, standard deviation 3 (constant) NA. probably want compute n() last avoid problem: Alternatively, explicitly exclude n columns operate : Another approach combine call n() across() single expression returns tibble:","code":"df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) #> n x y #> 1 NA 1 4.041452 df %>% summarise(across(where(is.numeric), sd), n = n()) #> x y n #> 1 1 4.041452 3 df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) #> n x y #> 1 3 1 4.041452 df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) #> n x y #> 1 3 1 4.041452"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"other-verbs","dir":"Articles","previous_headings":"Basic usage","what":"Other verbs","title":"Column-wise operations","text":"far ’ve focused use across() summarise(), works dplyr verb uses data masking: Rescale numeric variables range 0-1: verbs, like group_by(), count() distinct(), don’t need supply summary function, can useful use tidy-selection dynamically select set columns. cases, recommend using complement across(), pick(), works like across() doesn’t apply functions instead returns data frame containing selected columns. Find distinct Count combinations variables given pattern: across() doesn’t work select() rename() already use tidy select syntax; want transform column names function, can use rename_with().","code":"rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) #> # A tibble: 4 × 2 #> x y #> #> 1 0 0.385 #> 2 0.333 1 #> 3 0.667 0 #> 4 1 0.903 starwars %>% distinct(pick(contains(\"color\"))) #> # A tibble: 67 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 63 more rows starwars %>% count(pick(contains(\"color\")), sort = TRUE) #> # A tibble: 67 × 4 #> hair_color skin_color eye_color n #> #> 1 brown light brown 6 #> 2 brown fair blue 4 #> 3 none grey black 4 #> 4 black dark brown 3 #> # ℹ 63 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"filter","dir":"Articles","previous_headings":"Basic usage","what":"filter()","title":"Column-wise operations","text":"directly use across() filter() need extra step combine results. end, filter() two special purpose companion functions: if_any() keeps rows predicate true least one selected column: if_all() keeps rows predicate true selected columns:","code":"starwars %>% filter(if_any(everything(), ~ !is.na(.x))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(if_all(everything(), ~ !is.na(.x))) #> # A tibble: 29 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 Darth Vad… 202 136 none white yellow 41.9 male #> 3 Leia Orga… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> # ℹ 25 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"if-_at-_all","dir":"Articles","previous_headings":"","what":"_if, _at, _all","title":"Column-wise operations","text":"Prior versions dplyr allowed apply function multiple columns different way: using functions _if, _at, _all() suffixes. functions solved pressing need used many people, now superseded. means ’ll stay around, won’t receive new features get critical bug fixes.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"why-do-we-like-across","dir":"Articles","previous_headings":"_if, _at, _all","what":"Why do we like across()?","title":"Column-wise operations","text":"decide move away functions favour across()? across() makes possible express useful summaries previously impossible: across() reduces number functions dplyr needs provide. makes dplyr easier use (fewer functions remember) easier us implement new verbs (since need implement one function, four). across() unifies _if _at semantics can select position, name, type, can now create compound selections previously impossible. example, can now transform numeric columns whose name begins “x”: across((.numeric) & starts_with(\"x\")). across() doesn’t need use vars(). _at() functions place dplyr manually quote variable names, makes little weird hence harder remember.","code":"df %>% group_by(g1, g2) %>% summarise( across(where(is.numeric), mean), across(where(is.factor), nlevels), n = n(), )"},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"why-did-it-take-so-long-to-discover-across","dir":"Articles","previous_headings":"_if, _at, _all","what":"Why did it take so long to discover across()?","title":"Column-wise operations","text":"’s disappointing didn’t discover across() earlier, instead worked several false starts (first realising common problem, _each() functions, recently _if()/_at()/_all() functions). across() couldn’t work without three recent discoveries: can column data frame data frame. something provided base R, ’s well documented, took see useful, just theoretical curiosity. can use data frames allow summary functions return multiple columns. can use absence outer name convention want unpack data frame column individual columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/colwise.html","id":"how-do-you-convert-existing-code","dir":"Articles","previous_headings":"_if, _at, _all","what":"How do you convert existing code?","title":"Column-wise operations","text":"Fortunately, ’s generally straightforward translate existing code use across(): Strip _if(), _at() _all() suffix function. Call across(). first argument : _if(), old second argument wrapped (). _at(), old second argument, call vars() removed. _all(), everything(). subsequent arguments can copied . example: exceptions rule: rename_*() select_*() follow different pattern. already select semantics, generally used different way doesn’t direct equivalent across(); use new rename_with() instead. Previously, filter_*() paired all_vars() any_vars() helpers. new helpers if_any() if_all() can used inside filter() keep rows predicate true least one, selected columns: used mutate(), transformations performed across() applied . different behaviour mutate_if(), mutate_at(), mutate_all(), apply transformations one time. expect ’ll generally find new behaviour less surprising:","code":"df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE)) # -> df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) df %>% mutate_at(vars(c(x, starts_with(\"y\"))), mean) # -> df %>% mutate(across(c(x, starts_with(\"y\")), mean)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) df <- tibble(x = c(\"a\", \"b\"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) #> # A tibble: 1 × 3 #> x y z #> #> 1 b 1 1 # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) #> # A tibble: 2 × 3 #> x y z #> #> 1 a 1 -1 #> 2 b 1 1 df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) #> # A tibble: 1 × 3 #> x y z #> #> 1 0.5 1 8 df %>% mutate(across(everything(), ~ .x / y)) #> # A tibble: 1 × 3 #> x y z #> #> 1 0.5 1 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"data-starwars","dir":"Articles","previous_headings":"","what":"Data: starwars","title":"Introduction to dplyr","text":"explore basic data manipulation verbs dplyr, ’ll use dataset starwars. dataset contains 87 characters comes Star Wars API, documented ?starwars Note starwars tibble, modern reimagining data frame. ’s particularly useful large datasets prints first rows. can learn tibbles https://tibble.tidyverse.org; particular can convert data frames tibbles as_tibble().","code":"dim(starwars) #> [1] 87 14 starwars #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"single-table-verbs","dir":"Articles","previous_headings":"","what":"Single table verbs","title":"Introduction to dplyr","text":"dplyr aims provide function basic verb data manipulation. verbs can organised three categories based component dataset work : filter() chooses rows based column values. slice() chooses rows based location. arrange() changes order rows. select() changes whether column included. rename() changes name columns. mutate() changes values columns creates new columns. relocate() changes order columns. summarise() collapses group single row.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"the-pipe","dir":"Articles","previous_headings":"Single table verbs","what":"The pipe","title":"Introduction to dplyr","text":"dplyr functions take data frame (tibble) first argument. Rather forcing user either save intermediate objects nest functions, dplyr provides %>% operator magrittr. x %>% f(y) turns f(x, y) result one step “piped” next step. can use pipe rewrite multiple operations can read left--right, top--bottom (reading pipe operator “”).","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"filter-rows-with-filter","dir":"Articles","previous_headings":"Single table verbs","what":"Filter rows with filter()","title":"Introduction to dplyr","text":"filter() allows select subset rows data frame. Like single verbs, first argument tibble (data frame). second subsequent arguments refer variables within data frame, selecting rows expression TRUE. example, can select character light skin color brown eyes : roughly equivalent base R code:","code":"starwars %>% filter(skin_color == \"light\", eye_color == \"brown\") #> # A tibble: 7 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Leia Orga… 150 49 brown light brown 19 fema… #> 2 Biggs Dar… 183 84 black light brown 24 male #> 3 Padmé Ami… 185 45 brown light brown 46 fema… #> 4 Cordé 157 NA brown light brown NA NA #> # ℹ 3 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars[starwars$skin_color == \"light\" & starwars$eye_color == \"brown\", ]"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"arrange-rows-with-arrange","dir":"Articles","previous_headings":"Single table verbs","what":"Arrange rows with arrange()","title":"Introduction to dplyr","text":"arrange() works similarly filter() except instead filtering selecting rows, reorders . takes data frame, set column names (complicated expressions) order . provide one column name, additional column used break ties values preceding columns: Use desc() order column descending order:","code":"starwars %>% arrange(height, mass) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Yoda 66 17 white green brown 896 male #> 2 Ratts Tye… 79 15 none grey, blue unknown NA male #> 3 Wicket Sy… 88 20 brown brown brown 8 male #> 4 Dud Bolt 94 45 none blue, grey yellow NA male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% arrange(desc(height)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Yarael Po… 264 NA none white yellow NA male #> 2 Tarfful 234 136 brown brown blue NA male #> 3 Lama Su 229 88 none grey black NA male #> 4 Chewbacca 228 112 brown unknown blue 200 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"choose-rows-using-their-position-with-slice","dir":"Articles","previous_headings":"Single table verbs","what":"Choose rows using their position with slice()","title":"Introduction to dplyr","text":"slice() lets index rows (integer) locations. allows select, remove, duplicate rows. can get characters row numbers 5 10. accompanied number helpers common use cases: slice_head() slice_tail() select first last rows. slice_sample() randomly selects rows. Use option prop choose certain proportion cases. Use replace = TRUE perform bootstrap sample. needed, can weight sample weight argument. slice_min() slice_max() select rows highest lowest values variable. Note first must choose values NA.","code":"starwars %>% slice(5:10) #> # A tibble: 6 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Leia Orga… 150 49 brown light brown 19 fema… #> 2 Owen Lars 178 120 brown, gr… light blue 52 male #> 3 Beru Whit… 165 75 brown light blue 47 fema… #> 4 R5-D4 97 32 NA white, red red NA none #> # ℹ 2 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% slice_head(n = 3) #> # A tibble: 3 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% slice_sample(n = 5) #> # A tibble: 5 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Ayla Secu… 178 55 none blue hazel 48 fema… #> 2 Bossk 190 113 none green red 53 male #> 3 San Hill 191 NA none grey gold NA male #> 4 Luminara … 170 56.2 black yellow blue 58 fema… #> # ℹ 1 more row #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% slice_sample(prop = 0.1) #> # A tibble: 8 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Qui-Gon J… 193 89 brown fair blue 92 male #> 2 Jango Fett 183 79 black tan brown 66 male #> 3 Jocasta Nu 167 NA white fair blue NA fema… #> 4 Zam Wesell 168 55 blonde fair, gre… yellow NA fema… #> # ℹ 4 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) #> # A tibble: 3 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Yarael Po… 264 NA none white yellow NA male #> 2 Tarfful 234 136 brown brown blue NA male #> 3 Lama Su 229 88 none grey black NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"select-columns-with-select","dir":"Articles","previous_headings":"Single table verbs","what":"Select columns with select()","title":"Introduction to dplyr","text":"Often work large datasets many columns actually interest . select() allows rapidly zoom useful subset using operations usually work numeric variable positions: number helper functions can use within select(), like starts_with(), ends_with(), matches() contains(). let quickly match larger blocks variables meet criterion. See ?select details. can rename variables select() using named arguments: select() drops variables explicitly mentioned, ’s useful. Instead, use rename():","code":"# Select columns by name starwars %>% select(hair_color, skin_color, eye_color) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) #> # A tibble: 87 × 11 #> name height mass birth_year sex gender homeworld species films #> #> 1 Luke Skywa… 172 77 19 male mascu… Tatooine Human #> 2 C-3PO 167 75 112 none mascu… Tatooine Droid #> 3 R2-D2 96 32 33 none mascu… Naboo Droid #> 4 Darth Vader 202 136 41.9 male mascu… Tatooine Human #> # ℹ 83 more rows #> # ℹ 2 more variables: vehicles , starships # Select all columns ending with color starwars %>% select(ends_with(\"color\")) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows starwars %>% select(home_world = homeworld) #> # A tibble: 87 × 1 #> home_world #> #> 1 Tatooine #> 2 Tatooine #> 3 Naboo #> 4 Tatooine #> # ℹ 83 more rows starwars %>% rename(home_world = homeworld) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , home_world , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"add-new-columns-with-mutate","dir":"Articles","previous_headings":"Single table verbs","what":"Add new columns with mutate()","title":"Introduction to dplyr","text":"Besides selecting sets existing columns, ’s often useful add new columns functions existing columns. job mutate(): can’t see height meters just calculated, can fix using select command. dplyr::mutate() similar base transform(), allows refer columns ’ve just created: want keep new variables, use .keep = \"none\":","code":"starwars %>% mutate(height_m = height / 100) #> # A tibble: 87 × 15 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 7 more variables: gender , homeworld , species , #> # films , vehicles , starships , height_m starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) #> # A tibble: 87 × 15 #> height_m height name mass hair_color skin_color eye_color birth_year #> #> 1 1.72 172 Luke S… 77 blond fair blue 19 #> 2 1.67 167 C-3PO 75 NA gold yellow 112 #> 3 0.96 96 R2-D2 32 NA white, bl… red 33 #> 4 2.02 202 Darth … 136 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 7 more variables: sex , gender , homeworld , #> # species , films , vehicles , starships starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) #> # A tibble: 87 × 16 #> BMI name height mass hair_color skin_color eye_color birth_year #> #> 1 26.0 Luke Skyw… 172 77 blond fair blue 19 #> 2 26.9 C-3PO 167 75 NA gold yellow 112 #> 3 34.7 R2-D2 96 32 NA white, bl… red 33 #> 4 33.3 Darth Vad… 202 136 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 8 more variables: sex , gender , homeworld , #> # species , films , vehicles , starships , #> # height_m starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2), .keep = \"none\" ) #> # A tibble: 87 × 2 #> height_m BMI #> #> 1 1.72 26.0 #> 2 1.67 26.9 #> 3 0.96 34.7 #> 4 2.02 33.3 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"change-column-order-with-relocate","dir":"Articles","previous_headings":"Single table verbs","what":"Change column order with relocate()","title":"Introduction to dplyr","text":"Use similar syntax select() move blocks columns ","code":"starwars %>% relocate(sex:homeworld, .before = height) #> # A tibble: 87 × 14 #> name sex gender homeworld height mass hair_color skin_color #> #> 1 Luke Skywalker male mascu… Tatooine 172 77 blond fair #> 2 C-3PO none mascu… Tatooine 167 75 NA gold #> 3 R2-D2 none mascu… Naboo 96 32 NA white, bl… #> 4 Darth Vader male mascu… Tatooine 202 136 none white #> # ℹ 83 more rows #> # ℹ 6 more variables: eye_color , birth_year , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"summarise-values-with-summarise","dir":"Articles","previous_headings":"Single table verbs","what":"Summarise values with summarise()","title":"Introduction to dplyr","text":"last verb summarise(). collapses data frame single row. ’s useful learn group_by() verb .","code":"starwars %>% summarise(height = mean(height, na.rm = TRUE)) #> # A tibble: 1 × 1 #> height #> #> 1 175."},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"commonalities","dir":"Articles","previous_headings":"Single table verbs","what":"Commonalities","title":"Introduction to dplyr","text":"may noticed syntax function verbs similar: first argument data frame. subsequent arguments describe data frame. can refer columns data frame directly without using $. result new data frame Together properties make easy chain together multiple simple steps achieve complex result. five functions provide basis language data manipulation. basic level, can alter tidy data frame five useful ways: can reorder rows (arrange()), pick observations variables interest (filter() select()), add new variables functions existing variables (mutate()), collapse many values summary (summarise()).","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"combining-functions-with","dir":"Articles","previous_headings":"","what":"Combining functions with %>%","title":"Introduction to dplyr","text":"dplyr API functional sense function calls don’t side-effects. must always save results. doesn’t lead particularly elegant code, especially want many operations . either step--step: don’t want name intermediate results, need wrap function calls inside : difficult read order operations inside . Thus, arguments long way away function. get around problem, dplyr provides %>% operator magrittr. x %>% f(y) turns f(x, y) can use rewrite multiple operations can read left--right, top--bottom (reading pipe operator “”):","code":"a1 <- group_by(starwars, species, sex) a2 <- select(a1, height, mass) a3 <- summarise(a2, height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) #> Adding missing grouping variables: `species`, `sex` #> `summarise()` has grouped output by 'species'. You can override using the #> `.groups` argument. #> # A tibble: 41 × 4 #> # Groups: species [38] #> species sex height mass #> #> 1 Aleena male 79 15 #> 2 Besalisk male 198 102 #> 3 Cerean male 198 82 #> 4 Chagrian male 196 NaN #> # ℹ 37 more rows starwars %>% group_by(species, sex) %>% select(height, mass) %>% summarise( height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) )"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"patterns-of-operations","dir":"Articles","previous_headings":"","what":"Patterns of operations","title":"Introduction to dplyr","text":"dplyr verbs can classified type operations accomplish (sometimes speak semantics, .e., meaning). ’s helpful good grasp difference select mutate operations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"selecting-operations","dir":"Articles","previous_headings":"Patterns of operations","what":"Selecting operations","title":"Introduction to dplyr","text":"One appealing features dplyr can refer columns tibble regular variables. However, syntactic uniformity referring bare column names hides semantical differences across verbs. column symbol supplied select() meaning symbol supplied mutate(). Selecting operations expect column names positions. Hence, call select() bare variable names, actually represent positions tibble. following calls completely equivalent dplyr’s point view: token, means refer variables surrounding context name one columns. following example, height still represents 2, 5: One useful subtlety applies bare names selecting calls like c(height, mass) height:mass. cases, columns data frame put scope. allows refer contextual variables selection helpers: semantics usually intuitive. note subtle difference: first argument, name represents position 1. second argument, name evaluated surrounding context represents fifth column. long time, select() used understand column positions. Counting dplyr 0.6, now understands column names well. makes bit easier program select():","code":"# `name` represents the integer 1 select(starwars, name) #> # A tibble: 87 × 1 #> name #> #> 1 Luke Skywalker #> 2 C-3PO #> 3 R2-D2 #> 4 Darth Vader #> # ℹ 83 more rows select(starwars, 1) #> # A tibble: 87 × 1 #> name #> #> 1 Luke Skywalker #> 2 C-3PO #> 3 R2-D2 #> 4 Darth Vader #> # ℹ 83 more rows height <- 5 select(starwars, height) #> # A tibble: 87 × 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # ℹ 83 more rows name <- \"color\" select(starwars, ends_with(name)) #> # A tibble: 87 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> # ℹ 83 more rows name <- 5 select(starwars, name, identity(name)) #> # A tibble: 87 × 2 #> name skin_color #> #> 1 Luke Skywalker fair #> 2 C-3PO gold #> 3 R2-D2 white, blue #> 4 Darth Vader white #> # ℹ 83 more rows vars <- c(\"name\", \"height\") select(starwars, all_of(vars), \"mass\") #> # A tibble: 87 × 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/dplyr.html","id":"mutating-operations","dir":"Articles","previous_headings":"Patterns of operations","what":"Mutating operations","title":"Introduction to dplyr","text":"Mutate semantics quite different selection semantics. Whereas select() expects column names positions, mutate() expects column vectors. set smaller tibble use examples. use select(), bare column names stand positions tibble. mutate() hand, column symbols represent actual column vectors stored tibble. Consider happens give string number mutate(): mutate() gets length-1 vectors interprets new columns data frame. vectors recycled match number rows. ’s doesn’t make sense supply expressions like \"height\" + 10 mutate(). amounts adding 10 string! correct expression : way, can unquote values context values represent valid column. must either length 1 (get recycled) length number rows. following example create new vector add data frame: case point group_by(). might think select semantics, actually mutate semantics. quite handy allows group modified column: can’t supply column name group_by(). amounts creating new column containing string recycled number rows:","code":"df <- starwars %>% select(name, height, mass) mutate(df, \"height\", 2) #> # A tibble: 87 × 5 #> name height mass `\"height\"` `2` #> #> 1 Luke Skywalker 172 77 height 2 #> 2 C-3PO 167 75 height 2 #> 3 R2-D2 96 32 height 2 #> 4 Darth Vader 202 136 height 2 #> # ℹ 83 more rows mutate(df, height + 10) #> # A tibble: 87 × 4 #> name height mass `height + 10` #> #> 1 Luke Skywalker 172 77 182 #> 2 C-3PO 167 75 177 #> 3 R2-D2 96 32 106 #> 4 Darth Vader 202 136 212 #> # ℹ 83 more rows var <- seq(1, nrow(df)) mutate(df, new = var) #> # A tibble: 87 × 4 #> name height mass new #> #> 1 Luke Skywalker 172 77 1 #> 2 C-3PO 167 75 2 #> 3 R2-D2 96 32 3 #> 4 Darth Vader 202 136 4 #> # ℹ 83 more rows group_by(starwars, sex) #> # A tibble: 87 × 14 #> # Groups: sex [5] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships group_by(starwars, sex = as.factor(sex)) #> # A tibble: 87 × 14 #> # Groups: sex [5] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships group_by(starwars, height_binned = cut(height, 3)) #> # A tibble: 87 × 15 #> # Groups: height_binned [4] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 7 more variables: gender , homeworld , species , #> # films , vehicles , starships , height_binned group_by(df, \"month\") #> # A tibble: 87 × 4 #> # Groups: \"month\" [1] #> name height mass `\"month\"` #> #> 1 Luke Skywalker 172 77 month #> 2 C-3PO 167 75 month #> 3 R2-D2 96 32 month #> 4 Darth Vader 202 136 month #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"group_by","dir":"Articles","previous_headings":"","what":"group_by()","title":"Grouped data","text":"important grouping verb group_by(): takes data frame one variables group : can see grouping print data: use tally() count number rows group. sort argument useful want see largest groups front. well grouping existing variables, can group function existing variables. equivalent performing mutate() group_by():","code":"by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) by_species #> # A tibble: 87 × 14 #> # Groups: species [38] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships by_sex_gender #> # A tibble: 87 × 14 #> # Groups: sex, gender [6] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Skyw… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Vad… 202 136 none white yellow 41.9 male #> # ℹ 83 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships by_species %>% tally() #> # A tibble: 38 × 2 #> species n #> #> 1 Aleena 1 #> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 #> # ℹ 34 more rows by_sex_gender %>% tally(sort = TRUE) #> # A tibble: 6 × 3 #> # Groups: sex [5] #> sex gender n #> #> 1 male masculine 60 #> 2 female feminine 16 #> 3 none masculine 5 #> 4 NA NA 4 #> # ℹ 2 more rows bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() #> # A tibble: 5 × 2 #> bmi_cat n #> #> 1 (0,18.5] 10 #> 2 (18.5,25] 24 #> 3 (25,30] 13 #> 4 (30,Inf] 12 #> # ℹ 1 more row"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"group-metadata","dir":"Articles","previous_headings":"","what":"Group metadata","title":"Grouped data","text":"can see underlying group data group_keys(). one row group one column grouping variable: can see group row belongs group_indices(): rows group contains group_rows(): Use group_vars() just want names grouping variables:","code":"by_species %>% group_keys() #> # A tibble: 38 × 1 #> species #> #> 1 Aleena #> 2 Besalisk #> 3 Cerean #> 4 Chagrian #> # ℹ 34 more rows by_sex_gender %>% group_keys() #> # A tibble: 6 × 2 #> sex gender #> #> 1 female feminine #> 2 hermaphroditic masculine #> 3 male masculine #> 4 none feminine #> # ℹ 2 more rows by_species %>% group_indices() #> [1] 11 6 6 11 11 11 11 6 11 11 11 11 34 11 24 12 11 38 36 11 11 6 31 #> [24] 11 11 18 11 11 8 26 11 21 11 11 10 10 10 11 30 7 11 11 37 32 32 1 #> [47] 33 35 29 11 3 20 37 27 13 23 16 4 38 38 11 9 17 17 11 11 11 11 5 #> [70] 2 15 15 11 6 25 19 28 14 34 11 38 22 11 11 11 6 11 by_species %>% group_rows() %>% head() #> [6]> #> [[1]] #> [1] 46 #> #> [[2]] #> [1] 70 #> #> [[3]] #> [1] 51 #> #> [[4]] #> [1] 58 #> #> [[5]] #> [1] 69 #> #> [[6]] #> [1] 2 3 8 22 74 86 by_species %>% group_vars() #> [1] \"species\" by_sex_gender %>% group_vars() #> [1] \"sex\" \"gender\""},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"changing-and-adding-to-grouping-variables","dir":"Articles","previous_headings":"Group metadata","what":"Changing and adding to grouping variables","title":"Grouped data","text":"apply group_by() already grouped dataset, overwrite existing grouping variables. example, following code groups homeworld instead species: augment grouping, using .add = TRUE1. example, following code groups species homeworld:","code":"by_species %>% group_by(homeworld) %>% tally() #> # A tibble: 49 × 2 #> homeworld n #> #> 1 Alderaan 3 #> 2 Aleen Minor 1 #> 3 Bespin 1 #> 4 Bestine IV 1 #> # ℹ 45 more rows by_species %>% group_by(homeworld, .add = TRUE) %>% tally() #> # A tibble: 57 × 3 #> # Groups: species [38] #> species homeworld n #> #> 1 Aleena Aleen Minor 1 #> 2 Besalisk Ojom 1 #> 3 Cerean Cerea 1 #> 4 Chagrian Champala 1 #> # ℹ 53 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"removing-grouping-variables","dir":"Articles","previous_headings":"Group metadata","what":"Removing grouping variables","title":"Grouped data","text":"remove grouping variables, use ungroup(): can also choose selectively ungroup listing variables want remove:","code":"by_species %>% ungroup() %>% tally() #> # A tibble: 1 × 1 #> n #> #> 1 87 by_sex_gender %>% ungroup(sex) %>% tally() #> # A tibble: 3 × 2 #> gender n #> #> 1 feminine 17 #> 2 masculine 66 #> 3 NA 4"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"verbs","dir":"Articles","previous_headings":"","what":"Verbs","title":"Grouped data","text":"following sections describe grouping affects main dplyr verbs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"summarise","dir":"Articles","previous_headings":"Verbs","what":"summarise()","title":"Grouped data","text":"summarise() computes summary group. means starts group_keys(), adding summary variables right hand side: .groups= argument controls grouping structure output. historical behaviour removing right hand side grouping variable corresponds .groups = \"drop_last\" without message .groups = NULL message (default). Since version 1.0.0 groups may also kept (.groups = \"keep\") dropped (.groups = \"drop\"). output longer grouping variables, becomes ungrouped (.e. regular tibble).","code":"by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) #> # A tibble: 38 × 3 #> species n height #> #> 1 Aleena 1 79 #> 2 Besalisk 1 198 #> 3 Cerean 1 198 #> 4 Chagrian 1 196 #> # ℹ 34 more rows by_sex_gender %>% summarise(n = n()) %>% group_vars() #> `summarise()` has grouped output by 'sex'. You can override using the #> `.groups` argument. #> [1] \"sex\" by_sex_gender %>% summarise(n = n(), .groups = \"drop_last\") %>% group_vars() #> [1] \"sex\" by_sex_gender %>% summarise(n = n(), .groups = \"keep\") %>% group_vars() #> [1] \"sex\" \"gender\" by_sex_gender %>% summarise(n = n(), .groups = \"drop\") %>% group_vars() #> character(0)"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"select-rename-and-relocate","dir":"Articles","previous_headings":"Verbs","what":"select(), rename(), and relocate()","title":"Grouped data","text":"rename() relocate() behave identically grouped ungrouped data affect name position existing columns. Grouped select() almost identical ungrouped select, except always includes grouping variables: don’t want grouping variables, ’ll first ungroup(). (design possibly mistake, ’re stuck now.)","code":"by_species %>% select(mass) #> Adding missing grouping variables: `species` #> # A tibble: 87 × 2 #> # Groups: species [38] #> species mass #> #> 1 Human 77 #> 2 Droid 75 #> 3 Droid 32 #> 4 Human 136 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"arrange","dir":"Articles","previous_headings":"Verbs","what":"arrange()","title":"Grouped data","text":"Grouped arrange() ungrouped arrange(), unless set .by_group = TRUE, case order first grouping variables. Note second example sorted species (group_by() statement) mass (within species).","code":"by_species %>% arrange(desc(mass)) %>% relocate(species, mass) #> # A tibble: 87 × 14 #> # Groups: species [38] #> species mass name height hair_color skin_color eye_color birth_year #> #> 1 Hutt 1358 Jabba D… 175 NA green-tan… orange 600 #> 2 Kaleesh 159 Grievous 216 none brown, wh… green, y… NA #> 3 Droid 140 IG-88 200 none metal red 15 #> 4 Human 136 Darth V… 202 none white yellow 41.9 #> # ℹ 83 more rows #> # ℹ 6 more variables: sex , gender , homeworld , #> # films , vehicles , starships by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) #> # A tibble: 87 × 14 #> # Groups: species [38] #> species mass name height hair_color skin_color eye_color birth_year #> #> 1 Aleena 15 Ratts … 79 none grey, blue unknown NA #> 2 Besalisk 102 Dexter… 198 none brown yellow NA #> 3 Cerean 82 Ki-Adi… 198 white pale yellow 92 #> 4 Chagrian NA Mas Am… 196 none blue blue NA #> # ℹ 83 more rows #> # ℹ 6 more variables: sex , gender , homeworld , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"mutate","dir":"Articles","previous_headings":"Verbs","what":"mutate()","title":"Grouped data","text":"simple cases vectorised functions, grouped ungrouped mutate() give results. differ used summary functions: window functions like min_rank():","code":"# Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> name homeworld mass standard_mass #> #> 1 Luke Skywalker Tatooine 77 -20.3 #> 2 C-3PO Tatooine 75 -22.3 #> 3 R2-D2 Naboo 32 -65.3 #> 4 Darth Vader Tatooine 136 38.7 #> # ℹ 83 more rows # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> # Groups: homeworld [49] #> name homeworld mass standard_mass #> #> 1 Luke Skywalker Tatooine 77 -8.38 #> 2 C-3PO Tatooine 75 -10.4 #> 3 R2-D2 Naboo 32 -32.2 #> 4 Darth Vader Tatooine 136 50.6 #> # ℹ 83 more rows # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) #> # A tibble: 87 × 4 #> name homeworld height rank #> #> 1 Luke Skywalker Tatooine 172 28 #> 2 C-3PO Tatooine 167 20 #> 3 R2-D2 Naboo 96 5 #> 4 Darth Vader Tatooine 202 72 #> # ℹ 83 more rows # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) #> # A tibble: 87 × 4 #> # Groups: homeworld [49] #> name homeworld height rank #> #> 1 Luke Skywalker Tatooine 172 5 #> 2 C-3PO Tatooine 167 4 #> 3 R2-D2 Naboo 96 1 #> 4 Darth Vader Tatooine 202 10 #> # ℹ 83 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"filter","dir":"Articles","previous_headings":"Verbs","what":"filter()","title":"Grouped data","text":"grouped filter() effectively mutate() generate logical variable, keeps rows variable TRUE. means grouped filters can used summary functions. example, can find tallest character species: can also use filter() remove entire groups. example, following code eliminates groups single member:","code":"by_species %>% select(name, species, height) %>% filter(height == max(height)) #> # A tibble: 36 × 3 #> # Groups: species [36] #> name species height #> #> 1 Greedo Rodian 173 #> 2 Jabba Desilijic Tiure Hutt 175 #> 3 Yoda Yoda's species 66 #> 4 Bossk Trandoshan 190 #> # ℹ 32 more rows by_species %>% filter(n() != 1) %>% tally() #> # A tibble: 9 × 2 #> species n #> #> 1 Droid 6 #> 2 Gungan 3 #> 3 Human 35 #> 4 Kaminoan 2 #> # ℹ 5 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/grouping.html","id":"slice-and-friends","dir":"Articles","previous_headings":"Verbs","what":"slice() and friends","title":"Grouped data","text":"slice() friends (slice_head(), slice_tail(), slice_sample(), slice_min() slice_max()) select rows within group. example, can select first observation within species: Similarly, can use slice_min() select smallest n values variable:","code":"by_species %>% relocate(species) %>% slice(1) #> # A tibble: 38 × 14 #> # Groups: species [38] #> species name height mass hair_color skin_color eye_color birth_year #> #> 1 Aleena Ratts … 79 15 none grey, blue unknown NA #> 2 Besalisk Dexter… 198 102 none brown yellow NA #> 3 Cerean Ki-Adi… 198 82 white pale yellow 92 #> 4 Chagrian Mas Am… 196 NA none blue blue NA #> # ℹ 34 more rows #> # ℹ 6 more variables: sex , gender , homeworld , #> # films , vehicles , starships by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) #> # A tibble: 47 × 14 #> # Groups: species [38] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Ratts Tye… 79 15 none grey, blue unknown NA male #> 2 Dexter Je… 198 102 none brown yellow NA male #> 3 Ki-Adi-Mu… 198 82 white pale yellow 92 male #> 4 Mas Amedda 196 NA none blue blue NA male #> # ℹ 43 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"join-helpers","dir":"Articles","previous_headings":"","what":"Join helpers","title":"Using dplyr in packages","text":"dplyr 1.1.0, ’ve introduced join_by() along 4 helpers performing various types joins: closest() () within() overlaps() join_by() implements domain specific language (DSL) joins, internally interprets calls functions. ’ll notice dplyr::closest() isn’t exported function dplyr (dplyr::() base::within() happen preexisting functions). use closest() package, cause R CMD check note letting know ’ve used symbol doesn’t belong package. silence , place utils::globalVariables(\"closest\") source file package (outside function). dbplyr similar thing SQL functions, can see example . may also add utils package Imports, even though base package. can easily usethis::use_package(\"utils\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"data-masking-and-tidy-selection-notes","dir":"Articles","previous_headings":"","what":"Data masking and tidy selection NOTEs","title":"Using dplyr in packages","text":"’re writing package function uses data masking tidy selection: ’ll get NOTE R CMD check doesn’t know dplyr functions use tidy evaluation: eliminate note: data masking, import .data rlang use .data$var instead var. tidy selection, use \"var\" instead var. yields: programming dplyr, see vignette(\"programming\", package = \"dplyr\").","code":"my_summary_function <- function(data) { data %>% select(grp, x, y) %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } N checking R code for possible problems my_summary_function: no visible binding for global variable ‘grp’, ‘x’, ‘y’ Undefined global functions or variables: grp x y #' @importFrom rlang .data my_summary_function <- function(data) { data %>% select(\"grp\", \"x\", \"y\") %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"deprecation","dir":"Articles","previous_headings":"","what":"Deprecation","title":"Using dplyr in packages","text":"section focused updating package code deal backwards incompatible changes dplyr. try minimize backward incompatible changes much possible, sometimes necessary order radically simplify existing code, unlock lot potential value future. start general advice supporting multiple versions dplyr , discuss specific changes dplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"multiple-dplyr-versions","dir":"Articles","previous_headings":"Deprecation","what":"Multiple dplyr versions","title":"Using dplyr in packages","text":"Ideally, introduce breaking change ’ll want make sure package works released version development version dplyr. typically little bit work, two big advantages: ’s convenient users, since package work regardless version dplyr installed. ’s easier CRAN since doesn’t require massive coordinated release multiple packages. break package, typically send pull request implements patch releasing next version dplyr. time, patch backwards compatible older versions dplyr well. Ideally, ’ll accept patch submit new version package CRAN new version dplyr released. make code work multiple versions package, first tool simple statement: Always condition > current-version, >= next-version ensure branch also used development version package. example, current release version \"0.5.0\", development version \"0.5.0.9000\". typically works well branch “new version” introduces new argument slightly different return value. doesn’t work ’ve introduced new function need switch , like: case, checks run dplyr 1.0.10 ’ll get warning using function dplyr doesn’t exist (reframe()) even though branch never run. can get around using utils::getFromNamespace() indirectly call new dplyr function: soon next version dplyr actually CRAN (1.1.0 case), feel free remove code unconditionally use reframe() long also require dplyr (>= 1.1.0) DESCRIPTION file. typically painful users, ’d already updating package run requirement, updating one package along way generally easy. also helps get latest bug fixes features dplyr. Sometimes, isn’t possible avoid call @importFrom. example might importing generic can define method , generic moved packages. case, can take advantage little-known feature NAMESPACE file: can include raw statements.","code":"if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { # code for new version } else { # code for old version } if (utils::packageVersion(\"dplyr\") > \"1.0.10\") { dplyr::reframe(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } if (utils::packageVersion(\"dplyr\") > \"1.0.10\") { utils::getFromNamespace(\"reframe\", \"dplyr\")(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } #' @rawNamespace #' if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { #' importFrom(\"dbplyr\", \"build_sql\") #' } else { #' importFrom(\"dplyr\", \"build_sql\") #' }"},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"deprecation-of-mutate_-and-summarise_","dir":"Articles","previous_headings":"Deprecation","what":"Deprecation of mutate_*() and summarise_*()","title":"Using dplyr in packages","text":"following mutate() summarise() variants deprecated dplyr 0.7.0: mutate_each(), summarise_each() following variants superseded dplyr 1.0.0: mutate_all(), summarise_all() mutate_if(), summarise_if() mutate_at(), summarise_at() replaced using mutate() summarise() combination across(), introduced dplyr 1.0.0. used mutate_all() mutate_each() without supplying selection, update use across(everything()): provided selection mutate_at() mutate_each(), can switch across() selection: used predicates mutate_if(), can switch using across() combination ():","code":"starwars %>% mutate_each(funs(as.character)) starwars %>% mutate_all(funs(as.character)) starwars %>% mutate(across(everything(), as.character)) starwars %>% mutate_each(funs(as.character), height, mass) starwars %>% mutate_at(vars(height, mass), as.character) starwars %>% mutate(across(c(height, mass), as.character)) starwars %>% mutate_if(is.factor, as.character) starwars %>% mutate(across(where(is.factor), as.character))"},{"path":"https://dplyr.tidyverse.org/dev/articles/in-packages.html","id":"data-frame-subclasses","dir":"Articles","previous_headings":"","what":"Data frame subclasses","title":"Using dplyr in packages","text":"package author extending dplyr work new data frame subclass, encourage read documentation ?dplyr_extending. contains advice implement minimal number extension generics possible get maximal compatibility across dplyr’s verbs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Programming with dplyr","text":"dplyr verbs use tidy evaluation way. Tidy evaluation special type non-standard evaluation used throughout tidyverse. two basic forms found dplyr: arrange(), count(), filter(), group_by(), mutate(), summarise() use data masking can use data variables variables environment (.e. write my_variable df$my_variable). across(), relocate(), rename(), select(), pull() use tidy selection can easily choose variables based position, name, type (e.g. starts_with(\"x\") .numeric). determine whether function argument uses data masking tidy selection, look documentation: arguments list, ’ll see . Data masking tidy selection make interactive data exploration fast fluid, add new challenges attempt use indirectly loop function. vignette shows overcome challenges. ’ll first go basics data masking tidy selection, talk use indirectly, show number recipes solve common problems. vignette give minimum knowledge need effective programmer tidy evaluation. ’d like learn underlying theory, precisely ’s different non-standard evaluation, recommend read Metaprogramming chapters Advanced R.","code":"library(dplyr)"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"data-masking","dir":"Articles","previous_headings":"","what":"Data masking","title":"Programming with dplyr","text":"Data masking makes data manipulation faster requires less typing. (all1) base R functions need refer variables $, leading code repeats name data frame many times: dplyr equivalent code concise data masking allows need type starwars :","code":"starwars[starwars$homeworld == \"Naboo\" & starwars$species == \"Human\", ,] starwars %>% filter(homeworld == \"Naboo\", species == \"Human\")"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"data--and-env-variables","dir":"Articles","previous_headings":"Data masking","what":"Data- and env-variables","title":"Programming with dplyr","text":"key idea behind data masking blurs line two different meanings word “variable”: env-variables “programming” variables live environment. usually created <-. data-variables “statistical” variables live data frame. usually come data files (e.g. .csv, .xls), created manipulating existing variables. make definitions little concrete, take piece code: creates env-variable, df, contains two data-variables, x y. extracts data-variable x env-variable df using $. think blurring meaning “variable” really nice feature interactive data analysis allows refer data-vars , without prefix. seems fairly intuitive since many newer R users attempt write diamonds[x == 0 | y == 0, ]. Unfortunately, benefit come free. start program tools, ’re going grapple distinction. hard ’ve never think , ’ll take brain learn new concepts categories. However, ’ve teased apart idea “variable” data-variable env-variable, think ’ll find fairly straightforward use.","code":"df <- data.frame(x = runif(3), y = runif(3)) df$x #> [1] 0.08075014 0.83433304 0.60076089"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"indirection","dir":"Articles","previous_headings":"Data masking","what":"Indirection","title":"Programming with dplyr","text":"main challenge programming functions use data masking arises introduce indirection, .e. want get data-variable env-variable instead directly typing data-variable’s name. two main cases: data-variable function argument (.e. env-variable holds promise2), need embrace argument surrounding doubled braces, like filter(df, {{ var }}). following function uses embracing create wrapper around summarise() computes minimum maximum values variable, well number observations summarised: env-variable character vector, need index .data pronoun [[, like summarise(df, mean = mean(.data[[var]])). following example uses .data count number unique values variable mtcars: Note .data data frame; ’s special construct, pronoun, allows access current variables either directly, .data$x indirectly .data[[var]]. Don’t expect functions work .","code":"var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"name-injection","dir":"Articles","previous_headings":"Data masking","what":"Name injection","title":"Programming with dplyr","text":"Many data masking functions also use dynamic dots, gives another useful feature: generating names programmatically using := instead =. two basics forms, illustrated tibble(): name env-variable, can use glue syntax interpolate : name derived data-variable argument, can use embracing syntax: Learn ?rlang::`dyn-dots`.","code":"name <- \"susan\" tibble(\"{name}\" := 2) #> # A tibble: 1 × 1 #> susan #> #> 1 2 my_df <- function(x) { tibble(\"{{x}}_2\" := x * 2) } my_var <- 10 my_df(my_var) #> # A tibble: 1 × 1 #> my_var_2 #> #> 1 20"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"tidy-selection","dir":"Articles","previous_headings":"","what":"Tidy selection","title":"Programming with dplyr","text":"Data masking makes easy compute values within dataset. Tidy selection complementary tool makes easy work columns dataset.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"the-tidyselect-dsl","dir":"Articles","previous_headings":"Tidy selection","what":"The tidyselect DSL","title":"Programming with dplyr","text":"Underneath functions use tidy selection tidyselect package. provides miniature domain specific language makes easy select columns name, position, type. example: select(df, 1) selects first column; select(df, last_col()) selects last column. select(df, c(, b, c)) selects columns , b, c. select(df, starts_with(\"\")) selects columns whose name starts “”; select(df, ends_with(\"z\")) selects columns whose name ends “z”. select(df, (.numeric)) selects numeric columns. can see details ?dplyr_tidy_select.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"indirection-1","dir":"Articles","previous_headings":"Tidy selection","what":"Indirection","title":"Programming with dplyr","text":"data masking, tidy selection makes common task easier cost making less common task harder. want use tidy select indirectly column specification stored intermediate variable, ’ll need learn new tools. , two forms indirection: data-variable env-variable function argument, use technique data masking: embrace argument surrounding doubled braces. following function summarises data frame computing mean variables selected user: env-variable character vector, need use all_of() any_of() depending whether want function error variable found. following code uses all_of() select variables found character vector; ! plus all_of() select variables found character vector:","code":"summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) vars <- c(\"mpg\", \"vs\") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars))"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"how-tos","dir":"Articles","previous_headings":"","what":"How-tos","title":"Programming with dplyr","text":"following examples solve grab bag common problems. show minimum amount code can get basic idea; real problems require code combining multiple techniques.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"user-supplied-data","dir":"Articles","previous_headings":"How-tos","what":"User-supplied data","title":"Programming with dplyr","text":"check documentation, ’ll see .data never uses data masking tidy select. means don’t need anything special function:","code":"mutate_y <- function(data) { mutate(data, y = a + x) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"one-or-more-user-supplied-expressions","dir":"Articles","previous_headings":"How-tos","what":"One or more user-supplied expressions","title":"Programming with dplyr","text":"want user supply expression ’s passed onto argument uses data masking tidy select, embrace argument: generalises straightforward way want use one user-supplied expression multiple places: want user provide multiple expressions, embrace : want use name variable output, can embrace variable name left-hand side := {{:","code":"my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } my_summarise4 <- function(data, expr) { data %>% summarise( \"mean_{{expr}}\" := mean({{ expr }}), \"sum_{{expr}}\" := sum({{ expr }}), \"n_{{expr}}\" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( \"mean_{{mean_var}}\" := mean({{ mean_var }}), \"sd_{{sd_var}}\" := sd({{ sd_var }}) ) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"any-number-of-user-supplied-expressions","dir":"Articles","previous_headings":"How-tos","what":"Any number of user-supplied expressions","title":"Programming with dplyr","text":"want take arbitrary number user supplied expressions, use .... often useful want give user full control single part pipeline, like group_by() mutate(). use ... way, make sure arguments start . reduce chances argument clashes; see https://design.tidyverse.org/dots-prefix.html details.","code":"my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) #> # A tibble: 49 × 3 #> homeworld mass height #> #> 1 Alderaan 64 176. #> 2 Aleen Minor 15 79 #> 3 Bespin 79 175 #> 4 Bestine IV 110 180 #> # ℹ 45 more rows starwars %>% my_summarise(sex, gender) #> `summarise()` has grouped output by 'sex'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 4 #> # Groups: sex [5] #> sex gender mass height #> #> 1 female feminine 54.7 172. #> 2 hermaphroditic masculine 1358 175 #> 3 male masculine 80.2 179. #> 4 none feminine NaN 96 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"creating-multiple-columns","dir":"Articles","previous_headings":"How-tos","what":"Creating multiple columns","title":"Programming with dplyr","text":"Sometimes can useful single expression return multiple columns. can returning unnamed data frame: sort function useful inside summarise() mutate() allow add multiple columns returning data frame: Notice set .unpack = TRUE inside across(). tells across() unpack data frame returned quantile_df() respective columns, combining column names original columns (x y) column names returned function (val quant). function returns multiple rows per group, ’ll need switch summarise() reframe(). summarise() restricted returning 1 row summaries per group, reframe() lifts restriction:","code":"quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs), quant = probs ) } x <- 1:5 quantile_df(x) #> # A tibble: 3 × 2 #> val quant #> #> 1 2 0.25 #> 2 3 0.5 #> 3 4 0.75 df <- tibble( grp = rep(1:3, each = 10), x = runif(30), y = rnorm(30) ) df %>% group_by(grp) %>% summarise(quantile_df(x, probs = .5)) #> # A tibble: 3 × 3 #> grp val quant #> #> 1 1 0.361 0.5 #> 2 2 0.541 0.5 #> 3 3 0.456 0.5 df %>% group_by(grp) %>% summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE)) #> # A tibble: 3 × 5 #> grp x_val x_quant y_val y_quant #> #> 1 1 0.361 0.5 0.174 0.5 #> 2 2 0.541 0.5 -0.0110 0.5 #> 3 3 0.456 0.5 0.0583 0.5 df %>% group_by(grp) %>% reframe(across(x:y, quantile_df, .unpack = TRUE)) #> # A tibble: 9 × 5 #> grp x_val x_quant y_val y_quant #> #> 1 1 0.219 0.25 -0.710 0.25 #> 2 1 0.361 0.5 0.174 0.5 #> 3 1 0.674 0.75 0.524 0.75 #> 4 2 0.315 0.25 -0.690 0.25 #> # ℹ 5 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"transforming-user-supplied-variables","dir":"Articles","previous_headings":"How-tos","what":"Transforming user-supplied variables","title":"Programming with dplyr","text":"want user provide set data-variables transformed, use across() pick(): can use idea multiple sets input data-variables: Use .names argument across() control names output.","code":"my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) #> # A tibble: 38 × 3 #> species mass height #> #> 1 Aleena 15 79 #> 2 Besalisk 102 198 #> 3 Cerean 82 198 #> 4 Chagrian NaN 196 #> # ℹ 34 more rows my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = \"mean_{.col}\")) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"loop-over-multiple-variables","dir":"Articles","previous_headings":"How-tos","what":"Loop over multiple variables","title":"Programming with dplyr","text":"character vector variable names, want operate loop, index special .data pronoun: technique works loop alternatives like base R apply() family purrr map() family: (Note x .data[[x]] always treated env-variable; never come data.)","code":"for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]]))"},{"path":"https://dplyr.tidyverse.org/dev/articles/programming.html","id":"use-a-variable-from-an-shiny-input","dir":"Articles","previous_headings":"How-tos","what":"Use a variable from an Shiny input","title":"Programming with dplyr","text":"Many Shiny input controls return character vectors, can use approach : .data[[input$var]]. See https://mastering-shiny.org/action-tidy.html details case studies.","code":"library(shiny) ui <- fluidPage( selectInput(\"var\", \"Variable\", choices = names(diamonds)), tableOutput(\"output\") ) server <- function(input, output, session) { data <- reactive(filter(diamonds, .data[[input$var]] > 0)) output$output <- renderTable(head(data())) }"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"creating","dir":"Articles","previous_headings":"","what":"Creating","title":"Row-wise operations","text":"Row-wise operations require special type grouping group consists single row. create rowwise(): Like group_by(), rowwise() doesn’t really anything ; just changes verbs work. example, compare results mutate() following code: use mutate() regular data frame, computes mean x, y, z across rows. apply row-wise data frame, computes mean row. can optionally supply “identifier” variables call rowwise(). variables preserved call summarise(), behave somewhat similarly grouping variables passed group_by(): rowwise() just special form grouping, want remove data frame, just call ungroup().","code":"df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() #> # A tibble: 2 × 3 #> # Rowwise: #> x y z #> #> 1 1 3 5 #> 2 2 4 6 df %>% mutate(m = mean(c(x, y, z))) #> # A tibble: 2 × 4 #> x y z m #> #> 1 1 3 5 3.5 #> 2 2 4 6 3.5 df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) #> # A tibble: 2 × 4 #> # Rowwise: #> x y z m #> #> 1 1 3 5 3 #> 2 2 4 6 4 df <- tibble(name = c(\"Mara\", \"Hadley\"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) #> # A tibble: 2 × 1 #> m #> #> 1 3 #> 2 4 df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) #> `summarise()` has grouped output by 'name'. You can override using the #> `.groups` argument. #> # A tibble: 2 × 2 #> # Groups: name [2] #> name m #> #> 1 Mara 3 #> 2 Hadley 4"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"per-row-summary-statistics","dir":"Articles","previous_headings":"","what":"Per row summary statistics","title":"Row-wise operations","text":"dplyr::summarise() makes really easy summarise values across rows within one column. combined rowwise() also makes easy summarise values across columns within one row. see , ’ll start making little dataset: Let’s say want compute sum w, x, y, z row. start making row-wise data frame: can use mutate() add new column row, summarise() return just one summary: course, lot variables, ’s going tedious type every variable name. Instead, can use c_across() uses tidy selection syntax can succinctly select many variables: combine column-wise operations (see vignette(\"colwise\") details) compute proportion total column:","code":"df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df #> # A tibble: 6 × 5 #> id w x y z #> #> 1 1 10 20 30 40 #> 2 2 11 21 31 41 #> 3 3 12 22 32 42 #> 4 4 13 23 33 43 #> # ℹ 2 more rows rf <- df %>% rowwise(id) rf %>% mutate(total = sum(c(w, x, y, z))) #> # A tibble: 6 × 6 #> # Rowwise: id #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows rf %>% summarise(total = sum(c(w, x, y, z))) #> `summarise()` has grouped output by 'id'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 2 #> # Groups: id [6] #> id total #> #> 1 1 100 #> 2 2 104 #> 3 3 108 #> 4 4 112 #> # ℹ 2 more rows rf %>% mutate(total = sum(c_across(w:z))) #> # A tibble: 6 × 6 #> # Rowwise: id #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows rf %>% mutate(total = sum(c_across(where(is.numeric)))) #> # A tibble: 6 × 6 #> # Rowwise: id #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) #> # A tibble: 6 × 6 #> id w x y z total #> #> 1 1 0.1 0.2 0.3 0.4 100 #> 2 2 0.106 0.202 0.298 0.394 104 #> 3 3 0.111 0.204 0.296 0.389 108 #> 4 4 0.116 0.205 0.295 0.384 112 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"row-wise-summary-functions","dir":"Articles","previous_headings":"Per row summary statistics","what":"Row-wise summary functions","title":"Row-wise operations","text":"rowwise() approach work summary function. need greater speed, ’s worth looking built-row-wise variant summary function. efficient operate data frame whole; don’t split rows, compute summary, join results back together . NB: use df (rf) pick() (c_across()) rowMeans() rowSums() take multi-row data frame input. Also note -id needed avoid selecting id pick(). wasn’t required rowwise data frame specified id identifier original call rowwise(), preventing selected grouping column.","code":"df %>% mutate(total = rowSums(pick(where(is.numeric), -id))) #> # A tibble: 6 × 6 #> id w x y z total #> #> 1 1 10 20 30 40 100 #> 2 2 11 21 31 41 104 #> 3 3 12 22 32 42 108 #> 4 4 13 23 33 43 112 #> # ℹ 2 more rows df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id))) #> # A tibble: 6 × 6 #> id w x y z mean #> #> 1 1 10 20 30 40 25 #> 2 2 11 21 31 41 26 #> 3 3 12 22 32 42 27 #> 4 4 13 23 33 43 28 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"list-columns","dir":"Articles","previous_headings":"","what":"List-columns","title":"Row-wise operations","text":"rowwise() operations natural pairing list-columns. allow avoid explicit loops /functions apply() purrr::map() families.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"motivation","dir":"Articles","previous_headings":"List-columns","what":"Motivation","title":"Row-wise operations","text":"Imagine data frame, want count lengths element: might try calling length(): returns length column, length individual values. ’re R documentation aficionado, might know ’s already base R function just purpose: ’re experienced R programmer, might know apply function element list using sapply(), vapply(), one purrr map() functions: wouldn’t nice just write length(x) dplyr figure wanted compute length element inside x? Since ’re , might already guessing answer: just another application row-wise pattern.","code":"df <- tibble( x = list(1, 2:3, 4:6) ) df %>% mutate(l = length(x)) #> # A tibble: 3 × 2 #> x l #> #> 1 3 #> 2 3 #> 3 3 df %>% mutate(l = lengths(x)) #> # A tibble: 3 × 2 #> x l #> #> 1 1 #> 2 2 #> 3 3 df %>% mutate(l = sapply(x, length)) #> # A tibble: 3 × 2 #> x l #> #> 1 1 #> 2 2 #> 3 3 df %>% mutate(l = purrr::map_int(x, length)) #> # A tibble: 3 × 2 #> x l #> #> 1 1 #> 2 2 #> 3 3 df %>% rowwise() %>% mutate(l = length(x)) #> # A tibble: 3 × 2 #> # Rowwise: #> x l #> #> 1 1 #> 2 2 #> 3 3"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"subsetting","dir":"Articles","previous_headings":"List-columns","what":"Subsetting","title":"Row-wise operations","text":"continue , wanted briefly mention magic makes work. isn’t something ’ll generally need think (’ll just work), ’s useful know something goes wrong. ’s important difference grouped data frame group happens one row, row-wise data frame every group always one row. Take two data frames: compute properties y, ’ll notice results look different: key difference mutate() slices columns pass length(y) grouped mutate uses [ row-wise mutate uses [[. following code gives flavour differences used loop: Note magic applies ’re referring existing columns, ’re creating new rows. potentially confusing, ’re fairly confident ’s least worst solution, particularly given hint error message.","code":"df <- tibble(g = 1:2, y = list(1:3, \"a\")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) gf %>% mutate(type = typeof(y), length = length(y)) #> # A tibble: 2 × 4 #> # Groups: g [2] #> g y type length #> #> 1 1 list 1 #> 2 2 list 1 rf %>% mutate(type = typeof(y), length = length(y)) #> # A tibble: 2 × 4 #> # Rowwise: g #> g y type length #> #> 1 1 integer 3 #> 2 2 character 1 # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 #> [1] 1 1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 #> [1] 3 1 gf %>% mutate(y2 = y) #> # A tibble: 2 × 3 #> # Groups: g [2] #> g y y2 #> #> 1 1 #> 2 2 rf %>% mutate(y2 = y) #> Error in `mutate()`: #> ℹ In argument: `y2 = y`. #> ℹ In row 1. #> Caused by error: #> ! `y2` must be size 1, not 3. #> ℹ Did you mean: `y2 = list(y)` ? rf %>% mutate(y2 = list(y)) #> # A tibble: 2 × 3 #> # Rowwise: g #> g y y2 #> #> 1 1 #> 2 2 "},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"modelling","dir":"Articles","previous_headings":"List-columns","what":"Modelling","title":"Row-wise operations","text":"rowwise() data frames allow solve variety modelling problems think particularly elegant way. ’ll start creating nested data frame: little different usual group_by() output: visibly changed structure data. Now three rows (one group), list-col, data, stores data group. Also note output rowwise(); important ’s going make working list data frames much easier. one data frame per row, ’s straightforward make one model per row: supplement one set predictions per row: summarise model variety ways: easily access parameters model:","code":"by_cyl <- mtcars %>% nest_by(cyl) by_cyl #> # A tibble: 3 × 2 #> # Rowwise: cyl #> cyl data #> #> 1 4 #> 2 6 #> 3 8 mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods #> # A tibble: 3 × 3 #> # Rowwise: cyl #> cyl data mod #> #> 1 4 #> 2 6 #> 3 8 mods <- mods %>% mutate(pred = list(predict(mod, data))) mods #> # A tibble: 3 × 4 #> # Rowwise: cyl #> cyl data mod pred #> #> 1 4 #> 2 6 #> 3 8 mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rmse #> #> 1 4 3.01 #> 2 6 0.985 #> 3 8 1.87 mods %>% summarise(rsq = summary(mod)$r.squared) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rsq #> #> 1 4 0.509 #> 2 6 0.465 #> 3 8 0.423 mods %>% summarise(broom::glance(mod)) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 13 #> # Groups: cyl [3] #> cyl r.squared adj.r.squared sigma statistic p.value df logLik AIC #> #> 1 4 0.509 0.454 3.33 9.32 0.0137 1 -27.7 61.5 #> 2 6 0.465 0.357 1.17 4.34 0.0918 1 -9.83 25.7 #> 3 8 0.423 0.375 2.02 8.80 0.0118 1 -28.7 63.3 #> # ℹ 4 more variables: BIC , deviance , df.residual , #> # nobs mods %>% reframe(broom::tidy(mod)) #> # A tibble: 6 × 6 #> cyl term estimate std.error statistic p.value #> #> 1 4 (Intercept) 39.6 4.35 9.10 0.00000777 #> 2 4 wt -5.65 1.85 -3.05 0.0137 #> 3 6 (Intercept) 28.4 4.18 6.79 0.00105 #> 4 6 wt -2.78 1.33 -2.08 0.0918 #> # ℹ 2 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"repeated-function-calls","dir":"Articles","previous_headings":"","what":"Repeated function calls","title":"Row-wise operations","text":"rowwise() doesn’t just work functions return length-1 vector (aka summary functions); can work function result list. means rowwise() mutate() provide elegant way call function many times varying arguments, storing outputs alongside inputs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"simulations","dir":"Articles","previous_headings":"Repeated function calls","what":"Simulations","title":"Row-wise operations","text":"think particularly elegant way perform simulations, lets store simulated values along parameters generated . example, imagine following data frame describes properties 3 samples uniform distribution: can supply parameters runif() using rowwise() mutate(): Note use list() - runif() returns multiple values mutate() expression return something length 1. list() means ’ll get list column row list containing multiple values. forget use list(), dplyr give hint:","code":"df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) #> # A tibble: 3 × 4 #> # Rowwise: #> n min max data #> #> 1 1 0 1 #> 2 2 10 100 #> 3 3 100 1000 df %>% rowwise() %>% mutate(data = runif(n, min, max)) #> Error in `mutate()`: #> ℹ In argument: `data = runif(n, min, max)`. #> ℹ In row 2. #> Caused by error: #> ! `data` must be size 1, not 2. #> ℹ Did you mean: `data = list(runif(n, min, max))` ?"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"multiple-combinations","dir":"Articles","previous_headings":"Repeated function calls","what":"Multiple combinations","title":"Row-wise operations","text":"want call function every combination inputs? can use expand.grid() (tidyr::expand_grid()) generate data frame repeat pattern :","code":"df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) #> # A tibble: 9 × 3 #> # Rowwise: #> mean sd data #> #> 1 -1 1 #> 2 0 1 #> 3 1 1 #> 4 -1 10 #> # ℹ 5 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"varying-functions","dir":"Articles","previous_headings":"Repeated function calls","what":"Varying functions","title":"Row-wise operations","text":"complicated problems, might also want vary function called. tends bit awkward fit approach columns input tibble less regular. ’s still possible, ’s natural place use .call():","code":"df <- tribble( ~rng, ~params, \"runif\", list(n = 10), \"rnorm\", list(n = 20), \"rpois\", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) #> # A tibble: 3 × 3 #> # Rowwise: #> rng params data #> #> 1 runif #> 2 rnorm #> 3 rpois "},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"rowwise","dir":"Articles","previous_headings":"Previously","what":"rowwise()","title":"Row-wise operations","text":"rowwise() also questioning quite time, partly didn’t appreciate many people needed native ability compute summaries across multiple variables row. alternative, recommended performing row-wise operations purrr map() functions. However, challenging needed pick map function based number arguments varying type result, required quite knowledge purrr functions. also resistant rowwise() felt like automatically switching [ [[ magical way automatically list()-ing results made () magical. ’ve now persuaded row-wise magic good magic partly people find distinction [ [[ mystifying rowwise() means don’t need think . Since rowwise() clearly useful longer questioning, expect around long term.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/rowwise.html","id":"do","dir":"Articles","previous_headings":"Previously","what":"do()","title":"Row-wise operations","text":"’ve questioned need () quite time, never felt similar dplyr verbs. two main modes operation: Without argument names: call functions input output data frames using . refer “current” group. example, following code gets first row group: superseded pick() plus reframe(), variant summarise() can create multiple rows columns per group. arguments: worked like mutate() automatically wrapped every element list: now believe behaviour magical useful, can replaced summarise() pick(). needed (unlike ), can wrap results list . addition pick()/across() increased scope summarise()/reframe() means () longer needed, now superseded.","code":"mtcars %>% group_by(cyl) %>% do(head(., 1)) #> # A tibble: 3 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 3 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 mtcars %>% group_by(cyl) %>% reframe(head(pick(everything()), 1)) #> # A tibble: 3 × 11 #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 3 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) #> # A tibble: 3 × 2 #> # Rowwise: #> cyl nrows #> #> 1 4 #> 2 6 #> 3 8 mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(pick(everything()))) #> # A tibble: 3 × 2 #> cyl nrows #> #> 1 4 11 #> 2 6 7 #> 3 8 14"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"mutating-joins","dir":"Articles","previous_headings":"","what":"Mutating joins","title":"Two-table verbs","text":"Mutating joins allow combine variables multiple tables. example, consider flights airlines data nycflights13 package. one table flight information abbreviation carrier, another mapping abbreviations full names. can use join add carrier names flight data:","code":"library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) #> Joining with `by = join_by(carrier)` #> # A tibble: 336,776 × 9 #> year month day hour origin dest tailnum carrier name #> #> 1 2013 1 1 5 EWR IAH N14228 UA United Air Lines I… #> 2 2013 1 1 5 LGA IAH N24211 UA United Air Lines I… #> 3 2013 1 1 5 JFK MIA N619AA AA American Airlines … #> 4 2013 1 1 5 JFK BQN N804JB B6 JetBlue Airways #> 5 2013 1 1 6 LGA ATL N668DN DL Delta Air Lines In… #> # ℹ 336,771 more rows"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"controlling-how-the-tables-are-matched","dir":"Articles","previous_headings":"Mutating joins","what":"Controlling how the tables are matched","title":"Two-table verbs","text":"well x y, mutating join takes argument controls variables used match observations two tables. ways specify , illustrate various tables nycflights13: NULL, default. dplyr use variables appear tables, natural join. example, flights weather tables match common variables: year, month, day, hour origin. character vector, = \"x\". Like natural join, uses common variables. example, flights planes year columns, mean different things want join tailnum. Note year columns output disambiguated suffix. named character vector: = c(\"x\" = \"\"). match variable x table x variable table y. variables use used output. flight origin destination airport, need specify one want join :","code":"flights2 %>% left_join(weather) #> Joining with `by = join_by(year, month, day, hour, origin)` #> # A tibble: 336,776 × 18 #> year month day hour origin dest tailnum carrier temp dewp humid #> #> 1 2013 1 1 5 EWR IAH N14228 UA 39.0 28.0 64.4 #> 2 2013 1 1 5 LGA IAH N24211 UA 39.9 25.0 54.8 #> 3 2013 1 1 5 JFK MIA N619AA AA 39.0 27.0 61.6 #> 4 2013 1 1 5 JFK BQN N804JB B6 39.0 27.0 61.6 #> 5 2013 1 1 6 LGA ATL N668DN DL 39.9 25.0 54.8 #> # ℹ 336,771 more rows #> # ℹ 7 more variables: wind_dir , wind_speed , wind_gust , #> # precip , pressure , visib , time_hour flights2 %>% left_join(planes, by = \"tailnum\") #> # A tibble: 336,776 × 16 #> year.x month day hour origin dest tailnum carrier year.y type #> #> 1 2013 1 1 5 EWR IAH N14228 UA 1999 Fixed wing… #> 2 2013 1 1 5 LGA IAH N24211 UA 1998 Fixed wing… #> 3 2013 1 1 5 JFK MIA N619AA AA 1990 Fixed wing… #> 4 2013 1 1 5 JFK BQN N804JB B6 2012 Fixed wing… #> 5 2013 1 1 6 LGA ATL N668DN DL 1991 Fixed wing… #> # ℹ 336,771 more rows #> # ℹ 6 more variables: manufacturer , model , engines , #> # seats , speed , engine flights2 %>% left_join(airports, c(\"dest\" = \"faa\")) #> # A tibble: 336,776 × 15 #> year month day hour origin dest tailnum carrier name lat lon #> #> 1 2013 1 1 5 EWR IAH N14228 UA George… 30.0 -95.3 #> 2 2013 1 1 5 LGA IAH N24211 UA George… 30.0 -95.3 #> 3 2013 1 1 5 JFK MIA N619AA AA Miami … 25.8 -80.3 #> 4 2013 1 1 5 JFK BQN N804JB B6 NA NA NA #> 5 2013 1 1 6 LGA ATL N668DN DL Hartsf… 33.6 -84.4 #> # ℹ 336,771 more rows #> # ℹ 4 more variables: alt , tz , dst , tzone flights2 %>% left_join(airports, c(\"origin\" = \"faa\")) #> # A tibble: 336,776 × 15 #> year month day hour origin dest tailnum carrier name lat lon #> #> 1 2013 1 1 5 EWR IAH N14228 UA Newark… 40.7 -74.2 #> 2 2013 1 1 5 LGA IAH N24211 UA La Gua… 40.8 -73.9 #> 3 2013 1 1 5 JFK MIA N619AA AA John F… 40.6 -73.8 #> 4 2013 1 1 5 JFK BQN N804JB B6 John F… 40.6 -73.8 #> 5 2013 1 1 6 LGA ATL N668DN DL La Gua… 40.8 -73.9 #> # ℹ 336,771 more rows #> # ℹ 4 more variables: alt , tz , dst , tzone "},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"types-of-join","dir":"Articles","previous_headings":"Mutating joins","what":"Types of join","title":"Two-table verbs","text":"four types mutating join, differ behaviour match found. ’ll illustrate simple example: inner_join(x, y) includes observations match x y. left_join(x, y) includes observations x, regardless whether match . commonly used join ensures don’t lose observations primary table. right_join(x, y) includes observations y. ’s equivalent left_join(y, x), columns rows ordered differently. full_join() includes observations x y. left, right full joins collectively know outer joins. row doesn’t match outer join, new variables filled missing values.","code":"df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = \"a\") df1 %>% inner_join(df2) %>% knitr::kable() #> Joining with `by = join_by(x)` df1 %>% left_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 2 × 4 #> x y a b #> #> 1 1 2 10 a #> 2 2 1 NA NA df1 %>% right_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 2 × 4 #> x y a b #> #> 1 1 2 10 a #> 2 3 NA 10 a df2 %>% left_join(df1) #> Joining with `by = join_by(x)` #> # A tibble: 2 × 4 #> x a b y #> #> 1 3 10 a NA #> 2 1 10 a 2 df1 %>% full_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 3 × 4 #> x y a b #> #> 1 1 2 10 a #> 2 2 1 NA NA #> 3 3 NA 10 a"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"observations","dir":"Articles","previous_headings":"Mutating joins","what":"Observations","title":"Two-table verbs","text":"mutating joins primarily used add new variables, can also generate new observations. match unique, join add possible combinations (Cartesian product) matching observations:","code":"df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c(\"a\", \"b\", \"a\")) df1 %>% left_join(df2) #> Joining with `by = join_by(x)` #> Warning in left_join(., df2): Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #> \"many-to-many\"` to silence this warning. #> # A tibble: 5 × 3 #> x y z #> #> 1 1 1 a #> 2 1 1 b #> 3 1 2 a #> 4 1 2 b #> 5 2 3 a"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"filtering-joins","dir":"Articles","previous_headings":"","what":"Filtering joins","title":"Two-table verbs","text":"Filtering joins match observations way mutating joins, affect observations, variables. two types: semi_join(x, y) keeps observations x match y. anti_join(x, y) drops observations x match y. useful diagnosing join mismatches. example, many flights nycflights13 dataset don’t matching tail number planes table: ’re worried observations joins match, start semi_join() anti_join(). semi_join() anti_join() never duplicate; ever remove observations.","code":"library(\"nycflights13\") flights %>% anti_join(planes, by = \"tailnum\") %>% count(tailnum, sort = TRUE) #> # A tibble: 722 × 2 #> tailnum n #> #> 1 NA 2512 #> 2 N725MQ 575 #> 3 N722MQ 513 #> 4 N723MQ 507 #> 5 N713MQ 483 #> # ℹ 717 more rows df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c(\"a\", \"b\", \"a\")) # Four rows to start with: df1 %>% nrow() #> [1] 4 # And we get four rows after the join df1 %>% inner_join(df2, by = \"x\") %>% nrow() #> Warning in inner_join(., df2, by = \"x\"): Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #> \"many-to-many\"` to silence this warning. #> [1] 4 # But only two rows actually match df1 %>% semi_join(df2, by = \"x\") %>% nrow() #> [1] 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"set-operations","dir":"Articles","previous_headings":"","what":"Set operations","title":"Two-table verbs","text":"final type two-table verb set operations. expect x y inputs variables, treat observations like sets: intersect(x, y): return observations x y union(x, y): return unique observations x y setdiff(x, y): return observations x, y. Given simple data: four possibilities :","code":"(df1 <- tibble(x = 1:2, y = c(1L, 1L))) #> # A tibble: 2 × 2 #> x y #> #> 1 1 1 #> 2 2 1 (df2 <- tibble(x = 1:2, y = 1:2)) #> # A tibble: 2 × 2 #> x y #> #> 1 1 1 #> 2 2 2 intersect(df1, df2) #> # A tibble: 1 × 2 #> x y #> #> 1 1 1 # Note that we get 3 rows, not 4 union(df1, df2) #> # A tibble: 3 × 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 2 2 setdiff(df1, df2) #> # A tibble: 1 × 2 #> x y #> #> 1 2 1 setdiff(df2, df1) #> # A tibble: 1 × 2 #> x y #> #> 1 2 2"},{"path":"https://dplyr.tidyverse.org/dev/articles/two-table.html","id":"multiple-table-verbs","dir":"Articles","previous_headings":"","what":"Multiple-table verbs","title":"Two-table verbs","text":"dplyr provide functions working three tables. Instead use purrr::reduce() Reduce(), described Advanced R, iteratively combine two-table verbs handle many tables need.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"types-of-window-functions","dir":"Articles","previous_headings":"","what":"Types of window functions","title":"Window functions","text":"five main families window functions. Two families unrelated aggregation functions: Ranking ordering functions: row_number(), min_rank(), dense_rank(), cume_dist(), percent_rank(), ntile(). functions take vector order , return various types ranks. Offsets lead() lag() allow access previous next values vector, making easy compute differences trends. three families variations familiar aggregate functions: Cumulative aggregates: cumsum(), cummin(), cummax() (base R), cumall(), cumany(), cummean() (dplyr). Rolling aggregates operate fixed width window. won’t find base R dplyr, many implementations packages, RcppRoll. Recycled aggregates, aggregate repeated match length input. needed R vector recycling automatically recycles aggregates needed. important SQL, presence aggregation function usually tells database return one row per group. family described detail , focussing general goals use dplyr. details, refer individual function documentation.","code":""},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"ranking-functions","dir":"Articles","previous_headings":"","what":"Ranking functions","title":"Window functions","text":"ranking functions variations theme, differing handle ties: ’re familiar R, may recognise row_number() min_rank() can computed base rank() function various values ties.method argument. functions provided save little typing, make easier convert R SQL. Two ranking functions return numbers 0 1. percent_rank() gives percentage rank; cume_dist() gives proportion values less equal current value. useful want select (example) top 10% records within group. example: Finally, ntile() divides data n evenly sized buckets. ’s coarse ranking, can used mutate() divide data buckets summary. example, use ntile() divide players within team four ranked groups, calculate average number games within group. ranking functions rank lowest highest small input values get small ranks. Use desc() rank highest lowest.","code":"x <- c(1, 1, 2, 2, 2) row_number(x) #> [1] 1 2 3 4 5 min_rank(x) #> [1] 1 1 3 3 3 dense_rank(x) #> [1] 1 1 2 2 2 cume_dist(x) #> [1] 0.4 0.4 1.0 1.0 1.0 percent_rank(x) #> [1] 0.0 0.0 0.5 0.5 0.5 filter(players, cume_dist(desc(G)) < 0.1) #> # A tibble: 1,090 × 7 #> # Groups: playerID [995] #> playerID yearID teamID G AB R H #> #> 1 aaronha01 1963 ML1 161 631 121 201 #> 2 aaronha01 1968 ATL 160 606 84 174 #> 3 abbotji01 1991 CAL 34 0 0 0 #> 4 abernte02 1965 CHN 84 18 1 3 #> # ℹ 1,086 more rows by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) #> `summarise()` has grouped output by 'teamID'. You can override using the #> `.groups` argument. by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) #> # A tibble: 4 × 2 #> quartile `mean(G)` #> #> 1 1 22.7 #> 2 2 91.8 #> 3 3 253. #> 4 4 961."},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"lead-and-lag","dir":"Articles","previous_headings":"","what":"Lead and lag","title":"Window functions","text":"lead() lag() produce offset versions input vector either ahead behind original vector. can use : Compute differences percent changes. Using lag() convenient diff() n inputs diff() returns n - 1 outputs. Find value changes. lead() lag() optional argument order_by. set, instead using row order determine value comes another, use another variable. important already sorted data, want sort one way lag another. ’s simple example happens don’t specify order_by need :","code":"x <- 1:5 lead(x) #> [1] 2 3 4 5 NA lag(x) #> [1] NA 1 2 3 4 # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) # Find when a player changed teams filter(players, teamID != lag(teamID)) df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) #> year value prev_value #> 1 2000 0 4 #> 2 2001 1 0 #> 3 2002 4 9 #> 4 2003 9 16 #> 5 2004 16 NA #> 6 2005 25 1 right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) #> year value prev_value #> 1 2000 0 NA #> 2 2001 1 0 #> 3 2002 4 1 #> 4 2003 9 4 #> 5 2004 16 9 #> 6 2005 25 16"},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"cumulative-aggregates","dir":"Articles","previous_headings":"","what":"Cumulative aggregates","title":"Window functions","text":"Base R provides cumulative sum (cumsum()), cumulative min (cummin()), cumulative max (cummax()). (also provides cumprod() rarely useful). common accumulating functions cumany() cumall(), cumulative versions || &&, cummean(), cumulative mean. included base R, efficient versions provided dplyr. cumany() cumall() useful selecting rows , rows , condition true first (last) time. example, can use cumany() find records player played year 150 games: Like lead lag, may want control order accumulation occurs. None built functions order_by argument dplyr provides helper: order_by(). give variable want order , call window function: function uses bit non-standard evaluation, wouldn’t recommend using inside another function; use simpler less concise with_order() instead.","code":"filter(players, cumany(G > 150)) x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) #> [1] 55 54 52 49 45 40 34 27 19 10"},{"path":"https://dplyr.tidyverse.org/dev/articles/window-functions.html","id":"recycled-aggregates","dir":"Articles","previous_headings":"","what":"Recycled aggregates","title":"Window functions","text":"R’s vector recycling makes easy select values higher lower summary. call recycled aggregate value aggregate recycled length original vector. Recycled aggregates useful want find records greater mean less median: SQL databases don’t equivalent median() quantile(), filtering can achieve effect ntile(). example, x > median(x) equivalent ntile(x, 2) == 2; x > quantile(x, 75) equivalent ntile(x, 100) > 75 ntile(x, 4) > 3. can also use idea select records highest (x == max(x)) lowest value (x == min(x)) field, ranking functions give control ties, allow select number records. Recycled aggregates also useful conjunction mutate(). example, batting data, compute “career year”, number years player played since entered league: , introductory example, compute z-score:","code":"filter(players, G > mean(G)) filter(players, G < median(G)) filter(players, ntile(G, 2) == 2) mutate(players, career_year = yearID - min(yearID) + 1) #> # A tibble: 20,874 × 8 #> # Groups: playerID [1,436] #> playerID yearID teamID G AB R H career_year #> #> 1 aaronha01 1954 ML1 122 468 58 131 1 #> 2 aaronha01 1955 ML1 153 602 105 189 2 #> 3 aaronha01 1956 ML1 153 609 106 200 3 #> 4 aaronha01 1957 ML1 151 615 118 198 4 #> # ℹ 20,870 more rows mutate(players, G_z = (G - mean(G)) / sd(G)) #> # A tibble: 20,874 × 8 #> # Groups: playerID [1,436] #> playerID yearID teamID G AB R H G_z #> #> 1 aaronha01 1954 ML1 122 468 58 131 -1.16 #> 2 aaronha01 1955 ML1 153 602 105 189 0.519 #> 3 aaronha01 1956 ML1 153 609 106 200 0.519 #> 4 aaronha01 1957 ML1 151 615 118 198 0.411 #> # ℹ 20,870 more rows"},{"path":"https://dplyr.tidyverse.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Hadley Wickham. Author, maintainer. Romain François. Author. Lionel Henry. Author. Kirill Müller. Author. Davis Vaughan. Author. . Copyright holder, funder.","code":""},{"path":"https://dplyr.tidyverse.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Wickham H, François R, Henry L, Müller K, Vaughan D (2024). dplyr: Grammar Data Manipulation. R package version 1.1.4.9000, https://github.com/tidyverse/dplyr, https://dplyr.tidyverse.org.","code":"@Manual{, title = {dplyr: A Grammar of Data Manipulation}, author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan}, year = {2024}, note = {R package version 1.1.4.9000, https://github.com/tidyverse/dplyr}, url = {https://dplyr.tidyverse.org}, }"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"overview","dir":"","previous_headings":"","what":"Overview","title":"A Grammar of Data Manipulation","text":"dplyr grammar data manipulation, providing consistent set verbs help solve common data manipulation challenges: mutate() adds new variables functions existing variables select() picks variables based names. filter() picks cases based values. summarise() reduces multiple values single summary. arrange() changes ordering rows. combine naturally group_by() allows perform operation “group”. can learn vignette(\"dplyr\"). well single-table verbs, dplyr also provides variety two-table verbs, can learn vignette(\"two-table\"). new dplyr, best place start data transformation chapter R Data Science.","code":""},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"backends","dir":"","previous_headings":"","what":"Backends","title":"A Grammar of Data Manipulation","text":"addition data frames/tibbles, dplyr makes working computational backends accessible efficient. list alternative backends: arrow larger--memory datasets, including remote cloud storage like AWS S3, using Apache Arrow C++ engine, Acero. dtplyr large, -memory datasets. Translates dplyr code high performance data.table code. dbplyr data stored relational database. Translates dplyr code SQL. duckplyr using duckdb large, -memory datasets zero extra copies. Translates dplyr code high performance duckdb queries automatic R fallback translation isn’t possible. duckdb large datasets still small enough fit computer. sparklyr large datasets stored Apache Spark.","code":""},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"A Grammar of Data Manipulation","text":"","code":"# The easiest way to get dplyr is to install the whole tidyverse: install.packages(\"tidyverse\") # Alternatively, install just dplyr: install.packages(\"dplyr\")"},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"development-version","dir":"","previous_headings":"Installation","what":"Development version","title":"A Grammar of Data Manipulation","text":"get bug fix use feature development version, can install development version dplyr GitHub.","code":"# install.packages(\"pak\") pak::pak(\"tidyverse/dplyr\")"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"A Grammar of Data Manipulation","text":"","code":"library(dplyr) starwars %>% filter(species == \"Droid\") #> # A tibble: 6 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 C-3PO 167 75 gold yellow 112 none masculi… #> 2 R2-D2 96 32 white, blue red 33 none masculi… #> 3 R5-D4 97 32 white, red red NA none masculi… #> 4 IG-88 200 140 none metal red 15 none masculi… #> 5 R4-P17 96 NA none silver, red red, blue NA none feminine #> # ℹ 1 more row #> # ℹ 5 more variables: homeworld , species , films , #> # vehicles , starships starwars %>% select(name, ends_with(\"color\")) #> # A tibble: 87 × 4 #> name hair_color skin_color eye_color #> #> 1 Luke Skywalker blond fair blue #> 2 C-3PO gold yellow #> 3 R2-D2 white, blue red #> 4 Darth Vader none white yellow #> 5 Leia Organa brown light brown #> # ℹ 82 more rows starwars %>% mutate(name, bmi = mass / ((height / 100) ^ 2)) %>% select(name:mass, bmi) #> # A tibble: 87 × 4 #> name height mass bmi #> #> 1 Luke Skywalker 172 77 26.0 #> 2 C-3PO 167 75 26.9 #> 3 R2-D2 96 32 34.7 #> 4 Darth Vader 202 136 33.3 #> 5 Leia Organa 150 49 21.8 #> # ℹ 82 more rows starwars %>% arrange(desc(mass)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 Jabba De… 175 1358 green-tan… orange 600 herm… mascu… #> 2 Grievous 216 159 none brown, wh… green, y… NA male mascu… #> 3 IG-88 200 140 none metal red 15 none mascu… #> 4 Darth Va… 202 136 none white yellow 41.9 male mascu… #> 5 Tarfful 234 136 brown brown blue NA male mascu… #> # ℹ 82 more rows #> # ℹ 5 more variables: homeworld , species , films , #> # vehicles , starships starwars %>% group_by(species) %>% summarise( n = n(), mass = mean(mass, na.rm = TRUE) ) %>% filter( n > 1, mass > 50 ) #> # A tibble: 9 × 3 #> species n mass #> #> 1 Droid 6 69.8 #> 2 Gungan 3 74 #> 3 Human 35 81.3 #> 4 Kaminoan 2 88 #> 5 Mirialan 2 53.1 #> # ℹ 4 more rows"},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"getting-help","dir":"","previous_headings":"","what":"Getting help","title":"A Grammar of Data Manipulation","text":"encounter clear bug, please file issue minimal reproducible example GitHub. questions discussion, please use forum.posit.co.","code":""},{"path":"https://dplyr.tidyverse.org/dev/index.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of conduct","title":"A Grammar of Data Manipulation","text":"Please note project released Contributor Code Conduct. participating project agree abide terms.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":null,"dir":"Reference","previous_headings":"","what":"Apply a function (or functions) across multiple columns — across","title":"Apply a function (or functions) across multiple columns — across","text":"across() makes easy apply transformation multiple columns, allowing use select() semantics inside \"data-masking\" functions like summarise() mutate(). See vignette(\"colwise\") details. if_any() if_all() apply predicate function selection columns combine results single logical vector: if_any() TRUE predicate TRUE selected columns, if_all() TRUE predicate TRUE selected columns. just need select columns without applying transformation , probably want use pick() instead. across() supersedes family \"scoped variants\" like summarise_at(), summarise_if(), summarise_all().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Apply a function (or functions) across multiple columns — across","text":"","code":"across(.cols, .fns, ..., .names = NULL, .unpack = FALSE) if_any(.cols, .fns, ..., .names = NULL) if_all(.cols, .fns, ..., .names = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Apply a function (or functions) across multiple columns — across","text":".cols Columns transform. select grouping columns already automatically handled verb (.e. summarise() mutate()). .fns Functions apply selected columns. Possible values : function, e.g. mean. purrr-style lambda, e.g. ~ mean(.x, na.rm = TRUE) named list functions lambdas, e.g. list(mean = mean, n_miss = ~ sum(.na(.x)). function applied column, output named combining function name column name using glue specification .names. Within functions can use cur_column() cur_group() access current column grouping keys respectively. ... Additional arguments function calls .fns longer accepted ... clear evaluated: per across() per group? Instead supply additional arguments directly .fns using lambda. example, instead across(:b, mean, na.rm = TRUE) write across(:b, ~ mean(.x, na.rm = TRUE)). .names glue specification describes name output columns. can use {.col} stand selected column name, {.fn} stand name function applied. default (NULL) equivalent \"{.col}\" single function case \"{.col}_{.fn}\" case list used .fns. .unpack Optionally unpack data frames returned functions .fns, expands df-columns individual columns, retaining number rows data frame. FALSE, default, unpacking done. TRUE, unpacking done default glue specification \"{outer}_{inner}\". Otherwise, single glue specification can supplied describe name unpacked columns. can use {outer} refer name originally generated .names, {inner} refer names data frame unpacking.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Apply a function (or functions) across multiple columns — across","text":"across() typically returns tibble one column column .cols function .fns. .unpack used, columns may returned depending results .fns unpacked. if_any() if_all() return logical vector.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"timing-of-evaluation","dir":"Reference","previous_headings":"","what":"Timing of evaluation","title":"Apply a function (or functions) across multiple columns — across","text":"R code dplyr verbs generally evaluated per group. Inside across() however, code evaluated combination columns groups. evaluation timing important, example generating random variables, think happen place code consequence.","code":"gdf <- tibble(g = c(1, 1, 2, 3), v1 = 10:13, v2 = 20:23) %>% group_by(g) set.seed(1) # Outside: 1 normal variate n <- rnorm(1) gdf %>% mutate(across(v1:v2, ~ .x + n)) #> # A tibble: 4 x 3 #> # Groups: g [3] #> g v1 v2 #> #> 1 1 9.37 19.4 #> 2 1 10.4 20.4 #> 3 2 11.4 21.4 #> 4 3 12.4 22.4 # Inside a verb: 3 normal variates (ngroup) gdf %>% mutate(n = rnorm(1), across(v1:v2, ~ .x + n)) #> # A tibble: 4 x 4 #> # Groups: g [3] #> g v1 v2 n #> #> 1 1 10.2 20.2 0.184 #> 2 1 11.2 21.2 0.184 #> 3 2 11.2 21.2 -0.836 #> 4 3 14.6 24.6 1.60 # Inside `across()`: 6 normal variates (ncol * ngroup) gdf %>% mutate(across(v1:v2, ~ .x + rnorm(1))) #> # A tibble: 4 x 3 #> # Groups: g [3] #> g v1 v2 #> #> 1 1 10.3 20.7 #> 2 1 11.3 21.7 #> 3 2 11.2 22.6 #> 4 3 13.5 22.7"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/across.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Apply a function (or functions) across multiple columns — across","text":"","code":"# For better printing iris <- as_tibble(iris) # across() ----------------------------------------------------------------- # Different ways to select the same set of columns # See for details iris %>% mutate(across(c(Sepal.Length, Sepal.Width), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(c(1, 2), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(1:Sepal.Width, round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(where(is.double) & !c(Petal.Length, Petal.Width), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 4 1.4 0.2 setosa #> 2 5 3 1.4 0.2 setosa #> 3 5 3 1.3 0.2 setosa #> 4 5 3 1.5 0.2 setosa #> 5 5 4 1.4 0.2 setosa #> 6 5 4 1.7 0.4 setosa #> 7 5 3 1.4 0.3 setosa #> 8 5 3 1.5 0.2 setosa #> 9 4 3 1.4 0.2 setosa #> 10 5 3 1.5 0.1 setosa #> # ℹ 140 more rows # Using an external vector of names cols <- c(\"Sepal.Length\", \"Petal.Width\") iris %>% mutate(across(all_of(cols), round)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 3.5 1.4 0 setosa #> 2 5 3 1.4 0 setosa #> 3 5 3.2 1.3 0 setosa #> 4 5 3.1 1.5 0 setosa #> 5 5 3.6 1.4 0 setosa #> 6 5 3.9 1.7 0 setosa #> 7 5 3.4 1.4 0 setosa #> 8 5 3.4 1.5 0 setosa #> 9 4 2.9 1.4 0 setosa #> 10 5 3.1 1.5 0 setosa #> # ℹ 140 more rows # If the external vector is named, the output columns will be named according # to those names names(cols) <- tolower(cols) iris %>% mutate(across(all_of(cols), round)) #> # A tibble: 150 × 7 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal.length #> #> 1 5.1 3.5 1.4 0.2 setosa 5 #> 2 4.9 3 1.4 0.2 setosa 5 #> 3 4.7 3.2 1.3 0.2 setosa 5 #> 4 4.6 3.1 1.5 0.2 setosa 5 #> 5 5 3.6 1.4 0.2 setosa 5 #> 6 5.4 3.9 1.7 0.4 setosa 5 #> 7 4.6 3.4 1.4 0.3 setosa 5 #> 8 5 3.4 1.5 0.2 setosa 5 #> 9 4.4 2.9 1.4 0.2 setosa 4 #> 10 4.9 3.1 1.5 0.1 setosa 5 #> # ℹ 140 more rows #> # ℹ 1 more variable: petal.width # A purrr-style formula iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 3 × 3 #> Species Sepal.Length Sepal.Width #> #> 1 setosa 5.01 3.43 #> 2 versicolor 5.94 2.77 #> 3 virginica 6.59 2.97 # A named list of functions iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), list(mean = mean, sd = sd))) #> # A tibble: 3 × 5 #> Species Sepal.Length_mean Sepal.Length_sd Sepal.Width_mean #> #> 1 setosa 5.01 0.352 3.43 #> 2 versicolor 5.94 0.516 2.77 #> 3 virginica 6.59 0.636 2.97 #> # ℹ 1 more variable: Sepal.Width_sd # Use the .names argument to control the output names iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), mean, .names = \"mean_{.col}\")) #> # A tibble: 3 × 3 #> Species mean_Sepal.Length mean_Sepal.Width #> #> 1 setosa 5.01 3.43 #> 2 versicolor 5.94 2.77 #> 3 virginica 6.59 2.97 iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), list(mean = mean, sd = sd), .names = \"{.col}.{.fn}\")) #> # A tibble: 3 × 5 #> Species Sepal.Length.mean Sepal.Length.sd Sepal.Width.mean #> #> 1 setosa 5.01 0.352 3.43 #> 2 versicolor 5.94 0.516 2.77 #> 3 virginica 6.59 0.636 2.97 #> # ℹ 1 more variable: Sepal.Width.sd # If a named external vector is used for column selection, .names will use # those names when constructing the output names iris %>% group_by(Species) %>% summarise(across(all_of(cols), mean, .names = \"mean_{.col}\")) #> # A tibble: 3 × 3 #> Species mean_sepal.length mean_petal.width #> #> 1 setosa 5.01 0.246 #> 2 versicolor 5.94 1.33 #> 3 virginica 6.59 2.03 # When the list is not named, .fn is replaced by the function's position iris %>% group_by(Species) %>% summarise(across(starts_with(\"Sepal\"), list(mean, sd), .names = \"{.col}.fn{.fn}\")) #> # A tibble: 3 × 5 #> Species Sepal.Length.fn1 Sepal.Length.fn2 Sepal.Width.fn1 #> #> 1 setosa 5.01 0.352 3.43 #> 2 versicolor 5.94 0.516 2.77 #> 3 virginica 6.59 0.636 2.97 #> # ℹ 1 more variable: Sepal.Width.fn2 # When the functions in .fns return a data frame, you typically get a # \"packed\" data frame back quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble(quantile = probs, value = quantile(x, probs)) } iris %>% reframe(across(starts_with(\"Sepal\"), quantile_df)) #> # A tibble: 3 × 2 #> Sepal.Length$quantile $value Sepal.Width$quantile $value #> #> 1 0.25 5.1 0.25 2.8 #> 2 0.5 5.8 0.5 3 #> 3 0.75 6.4 0.75 3.3 # Use .unpack to automatically expand these packed data frames into their # individual columns iris %>% reframe(across(starts_with(\"Sepal\"), quantile_df, .unpack = TRUE)) #> # A tibble: 3 × 4 #> Sepal.Length_quantile Sepal.Length_value Sepal.Width_quantile #> #> 1 0.25 5.1 0.25 #> 2 0.5 5.8 0.5 #> 3 0.75 6.4 0.75 #> # ℹ 1 more variable: Sepal.Width_value # .unpack can utilize a glue specification if you don't like the defaults iris %>% reframe(across(starts_with(\"Sepal\"), quantile_df, .unpack = \"{outer}.{inner}\")) #> # A tibble: 3 × 4 #> Sepal.Length.quantile Sepal.Length.value Sepal.Width.quantile #> #> 1 0.25 5.1 0.25 #> 2 0.5 5.8 0.5 #> 3 0.75 6.4 0.75 #> # ℹ 1 more variable: Sepal.Width.value # This is also useful inside mutate(), for example, with a multi-lag helper multilag <- function(x, lags = 1:3) { names(lags) <- as.character(lags) purrr::map_dfr(lags, lag, x = x) } iris %>% group_by(Species) %>% mutate(across(starts_with(\"Sepal\"), multilag, .unpack = TRUE)) %>% select(Species, starts_with(\"Sepal\")) #> # A tibble: 150 × 9 #> # Groups: Species [3] #> Species Sepal.Length Sepal.Width Sepal.Length_1 Sepal.Length_2 #> #> 1 setosa 5.1 3.5 NA NA #> 2 setosa 4.9 3 5.1 NA #> 3 setosa 4.7 3.2 4.9 5.1 #> 4 setosa 4.6 3.1 4.7 4.9 #> 5 setosa 5 3.6 4.6 4.7 #> 6 setosa 5.4 3.9 5 4.6 #> 7 setosa 4.6 3.4 5.4 5 #> 8 setosa 5 3.4 4.6 5.4 #> 9 setosa 4.4 2.9 5 4.6 #> 10 setosa 4.9 3.1 4.4 5 #> # ℹ 140 more rows #> # ℹ 4 more variables: Sepal.Length_3 , Sepal.Width_1 , #> # Sepal.Width_2 , Sepal.Width_3 # if_any() and if_all() ---------------------------------------------------- iris %>% filter(if_any(ends_with(\"Width\"), ~ . > 4)) #> # A tibble: 3 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.7 4.4 1.5 0.4 setosa #> 2 5.2 4.1 1.5 0.1 setosa #> 3 5.5 4.2 1.4 0.2 setosa iris %>% filter(if_all(ends_with(\"Width\"), ~ . > 2)) #> # A tibble: 23 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 6.3 3.3 6 2.5 virginica #> 2 7.1 3 5.9 2.1 virginica #> 3 6.5 3 5.8 2.2 virginica #> 4 7.6 3 6.6 2.1 virginica #> 5 7.2 3.6 6.1 2.5 virginica #> 6 6.8 3 5.5 2.1 virginica #> 7 5.8 2.8 5.1 2.4 virginica #> 8 6.4 3.2 5.3 2.3 virginica #> 9 7.7 3.8 6.7 2.2 virginica #> 10 7.7 2.6 6.9 2.3 virginica #> # ℹ 13 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/add_rownames.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert row names to an explicit variable. — add_rownames","title":"Convert row names to an explicit variable. — add_rownames","text":"Please use tibble::rownames_to_column() instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/add_rownames.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert row names to an explicit variable. — add_rownames","text":"","code":"add_rownames(df, var = \"rowname\")"},{"path":"https://dplyr.tidyverse.org/dev/reference/add_rownames.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert row names to an explicit variable. — add_rownames","text":"df Input data frame rownames. var Name variable use","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":null,"dir":"Reference","previous_headings":"","what":"Flexible equality comparison for data frames — all_equal","title":"Flexible equality comparison for data frames — all_equal","text":"all_equal() allows compare data frames, optionally ignoring row column names. deprecated dplyr 1.1.0, makes easy ignore important differences.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Flexible equality comparison for data frames — all_equal","text":"","code":"all_equal( target, current, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE, ... )"},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Flexible equality comparison for data frames — all_equal","text":"target, current Two data frames compare. ignore_col_order order columns ignored? ignore_row_order order rows ignored? convert similar classes converted? Currently convert factor character integer double. ... Ignored. Needed compatibility .equal().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Flexible equality comparison for data frames — all_equal","text":"TRUE equal, otherwise character vector describing reasons equal. Use isTRUE() using result expression.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_equal.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Flexible equality comparison for data frames — all_equal","text":"","code":"scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] # `all_equal()` ignored row and column ordering by default, # but we now feel that that makes it too easy to make mistakes mtcars2 <- scramble(mtcars) all_equal(mtcars, mtcars2) #> Warning: `all_equal()` was deprecated in dplyr 1.1.0. #> ℹ Please use `all.equal()` instead. #> ℹ And manually order the rows/cols as needed #> [1] TRUE # Instead, be explicit about the row and column ordering all.equal( mtcars, mtcars2[rownames(mtcars), names(mtcars)] ) #> [1] TRUE"},{"path":"https://dplyr.tidyverse.org/dev/reference/all_vars.html","id":null,"dir":"Reference","previous_headings":"","what":"Apply predicate to all variables — all_vars","title":"Apply predicate to all variables — all_vars","text":"all_vars() any_vars() needed scoped verbs, superseded use across() existing verb. See vignette(\"colwise\") details. quoting functions signal scoped filtering verbs (e.g. filter_if() filter_all()) predicate expression applied relevant variables. all_vars() variant takes intersection predicate expressions & any_vars() variant takes union |.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/all_vars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Apply predicate to all variables — all_vars","text":"","code":"all_vars(expr) any_vars(expr)"},{"path":"https://dplyr.tidyverse.org/dev/reference/all_vars.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Apply predicate to all variables — all_vars","text":"expr expression returns logical vector, using . refer \"current\" variable.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/args_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Helper for consistent documentation of .by — args_by","title":"Helper for consistent documentation of .by — args_by","text":"Use @inheritParams args_by consistently document ..","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/args_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Helper for consistent documentation of .by — args_by","text":". Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":null,"dir":"Reference","previous_headings":"","what":"Order rows using column values — arrange","title":"Order rows using column values — arrange","text":"arrange() orders rows data frame values selected columns. Unlike dplyr verbs, arrange() largely ignores grouping; need explicitly mention grouping variables (use .by_group = TRUE) order group , functions variables evaluated per data frame, per group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Order rows using column values — arrange","text":"","code":"arrange(.data, ..., .by_group = FALSE) # S3 method for data.frame arrange(.data, ..., .by_group = FALSE, .locale = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Order rows using column values — arrange","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Variables, functions variables. Use desc() sort variable descending order. .by_group TRUE, sort first grouping variable. Applies grouped data frames . .locale locale sort character vectors . NULL, default, uses \"C\" locale unless dplyr.legacy_locale global option escape hatch active. See dplyr-locale help page details. single string stringi::stri_locale_list() supplied, used locale sort . example, \"en\" sort American English locale. requires stringi package. \"C\" supplied, character vectors always sorted C locale. require stringi often much faster supplying locale identifier. C locale English locales, \"en\", particularly comes data containing mix upper lower case letters. explained detail locale help page Default locale section.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Order rows using column values — arrange","text":"object type .data. output following properties: rows appear output, (usually) different place. Columns modified. Groups modified. Data frame attributes preserved.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"missing-values","dir":"Reference","previous_headings":"","what":"Missing values","title":"Order rows using column values — arrange","text":"Unlike base sorting sort(), NA : always sorted end local data, even wrapped desc(). treated differently remote data, depending backend.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Order rows using column values — arrange","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Order rows using column values — arrange","text":"","code":"arrange(mtcars, cyl, disp) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 arrange(mtcars, desc(disp)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 # grouped arrange ignores groups by_cyl <- mtcars %>% group_by(cyl) by_cyl %>% arrange(desc(wt)) #> # A tibble: 32 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 2 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 3 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 4 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 #> 5 19.2 8 400 175 3.08 3.84 17.0 0 0 3 2 #> 6 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 8 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3 #> 9 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 10 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> # ℹ 22 more rows # Unless you specifically ask: by_cyl %>% arrange(desc(wt), .by_group = TRUE) #> # A tibble: 32 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 2 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 3 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 #> 4 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> 5 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 6 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 7 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 8 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 9 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 10 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> # ℹ 22 more rows # use embracing when wrapping in a function; # see ?rlang::args_data_masking for more details tidy_eval_arrange <- function(.data, var) { .data %>% arrange({{ var }}) } tidy_eval_arrange(mtcars, mpg) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 # Use `across()` or `pick()` to select columns with tidy-select iris %>% arrange(pick(starts_with(\"Sepal\"))) #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 4.3 3.0 1.1 0.1 setosa #> 2 4.4 2.9 1.4 0.2 setosa #> 3 4.4 3.0 1.3 0.2 setosa #> 4 4.4 3.2 1.3 0.2 setosa #> 5 4.5 2.3 1.3 0.3 setosa #> 6 4.6 3.1 1.5 0.2 setosa #> 7 4.6 3.2 1.4 0.2 setosa #> 8 4.6 3.4 1.4 0.3 setosa #> 9 4.6 3.6 1.0 0.2 setosa #> 10 4.7 3.2 1.3 0.2 setosa #> 11 4.7 3.2 1.6 0.2 setosa #> 12 4.8 3.0 1.4 0.1 setosa #> 13 4.8 3.0 1.4 0.3 setosa #> 14 4.8 3.1 1.6 0.2 setosa #> 15 4.8 3.4 1.6 0.2 setosa #> 16 4.8 3.4 1.9 0.2 setosa #> 17 4.9 2.4 3.3 1.0 versicolor #> 18 4.9 2.5 4.5 1.7 virginica #> 19 4.9 3.0 1.4 0.2 setosa #> 20 4.9 3.1 1.5 0.1 setosa #> 21 4.9 3.1 1.5 0.2 setosa #> 22 4.9 3.6 1.4 0.1 setosa #> 23 5.0 2.0 3.5 1.0 versicolor #> 24 5.0 2.3 3.3 1.0 versicolor #> 25 5.0 3.0 1.6 0.2 setosa #> 26 5.0 3.2 1.2 0.2 setosa #> 27 5.0 3.3 1.4 0.2 setosa #> 28 5.0 3.4 1.5 0.2 setosa #> 29 5.0 3.4 1.6 0.4 setosa #> 30 5.0 3.5 1.3 0.3 setosa #> 31 5.0 3.5 1.6 0.6 setosa #> 32 5.0 3.6 1.4 0.2 setosa #> 33 5.1 2.5 3.0 1.1 versicolor #> 34 5.1 3.3 1.7 0.5 setosa #> 35 5.1 3.4 1.5 0.2 setosa #> 36 5.1 3.5 1.4 0.2 setosa #> 37 5.1 3.5 1.4 0.3 setosa #> 38 5.1 3.7 1.5 0.4 setosa #> 39 5.1 3.8 1.5 0.3 setosa #> 40 5.1 3.8 1.9 0.4 setosa #> 41 5.1 3.8 1.6 0.2 setosa #> 42 5.2 2.7 3.9 1.4 versicolor #> 43 5.2 3.4 1.4 0.2 setosa #> 44 5.2 3.5 1.5 0.2 setosa #> 45 5.2 4.1 1.5 0.1 setosa #> 46 5.3 3.7 1.5 0.2 setosa #> 47 5.4 3.0 4.5 1.5 versicolor #> 48 5.4 3.4 1.7 0.2 setosa #> 49 5.4 3.4 1.5 0.4 setosa #> 50 5.4 3.7 1.5 0.2 setosa #> 51 5.4 3.9 1.7 0.4 setosa #> 52 5.4 3.9 1.3 0.4 setosa #> 53 5.5 2.3 4.0 1.3 versicolor #> 54 5.5 2.4 3.8 1.1 versicolor #> 55 5.5 2.4 3.7 1.0 versicolor #> 56 5.5 2.5 4.0 1.3 versicolor #> 57 5.5 2.6 4.4 1.2 versicolor #> 58 5.5 3.5 1.3 0.2 setosa #> 59 5.5 4.2 1.4 0.2 setosa #> 60 5.6 2.5 3.9 1.1 versicolor #> 61 5.6 2.7 4.2 1.3 versicolor #> 62 5.6 2.8 4.9 2.0 virginica #> 63 5.6 2.9 3.6 1.3 versicolor #> 64 5.6 3.0 4.5 1.5 versicolor #> 65 5.6 3.0 4.1 1.3 versicolor #> 66 5.7 2.5 5.0 2.0 virginica #> 67 5.7 2.6 3.5 1.0 versicolor #> 68 5.7 2.8 4.5 1.3 versicolor #> 69 5.7 2.8 4.1 1.3 versicolor #> 70 5.7 2.9 4.2 1.3 versicolor #> 71 5.7 3.0 4.2 1.2 versicolor #> 72 5.7 3.8 1.7 0.3 setosa #> 73 5.7 4.4 1.5 0.4 setosa #> 74 5.8 2.6 4.0 1.2 versicolor #> 75 5.8 2.7 4.1 1.0 versicolor #> 76 5.8 2.7 3.9 1.2 versicolor #> 77 5.8 2.7 5.1 1.9 virginica #> 78 5.8 2.7 5.1 1.9 virginica #> 79 5.8 2.8 5.1 2.4 virginica #> 80 5.8 4.0 1.2 0.2 setosa #> 81 5.9 3.0 4.2 1.5 versicolor #> 82 5.9 3.0 5.1 1.8 virginica #> 83 5.9 3.2 4.8 1.8 versicolor #> 84 6.0 2.2 4.0 1.0 versicolor #> 85 6.0 2.2 5.0 1.5 virginica #> 86 6.0 2.7 5.1 1.6 versicolor #> 87 6.0 2.9 4.5 1.5 versicolor #> 88 6.0 3.0 4.8 1.8 virginica #> 89 6.0 3.4 4.5 1.6 versicolor #> 90 6.1 2.6 5.6 1.4 virginica #> 91 6.1 2.8 4.0 1.3 versicolor #> 92 6.1 2.8 4.7 1.2 versicolor #> 93 6.1 2.9 4.7 1.4 versicolor #> 94 6.1 3.0 4.6 1.4 versicolor #> 95 6.1 3.0 4.9 1.8 virginica #> 96 6.2 2.2 4.5 1.5 versicolor #> 97 6.2 2.8 4.8 1.8 virginica #> 98 6.2 2.9 4.3 1.3 versicolor #> 99 6.2 3.4 5.4 2.3 virginica #> 100 6.3 2.3 4.4 1.3 versicolor #> 101 6.3 2.5 4.9 1.5 versicolor #> 102 6.3 2.5 5.0 1.9 virginica #> 103 6.3 2.7 4.9 1.8 virginica #> 104 6.3 2.8 5.1 1.5 virginica #> 105 6.3 2.9 5.6 1.8 virginica #> 106 6.3 3.3 4.7 1.6 versicolor #> 107 6.3 3.3 6.0 2.5 virginica #> 108 6.3 3.4 5.6 2.4 virginica #> 109 6.4 2.7 5.3 1.9 virginica #> 110 6.4 2.8 5.6 2.1 virginica #> 111 6.4 2.8 5.6 2.2 virginica #> 112 6.4 2.9 4.3 1.3 versicolor #> 113 6.4 3.1 5.5 1.8 virginica #> 114 6.4 3.2 4.5 1.5 versicolor #> 115 6.4 3.2 5.3 2.3 virginica #> 116 6.5 2.8 4.6 1.5 versicolor #> 117 6.5 3.0 5.8 2.2 virginica #> 118 6.5 3.0 5.5 1.8 virginica #> 119 6.5 3.0 5.2 2.0 virginica #> 120 6.5 3.2 5.1 2.0 virginica #> 121 6.6 2.9 4.6 1.3 versicolor #> 122 6.6 3.0 4.4 1.4 versicolor #> 123 6.7 2.5 5.8 1.8 virginica #> 124 6.7 3.0 5.0 1.7 versicolor #> 125 6.7 3.0 5.2 2.3 virginica #> 126 6.7 3.1 4.4 1.4 versicolor #> 127 6.7 3.1 4.7 1.5 versicolor #> 128 6.7 3.1 5.6 2.4 virginica #> 129 6.7 3.3 5.7 2.1 virginica #> 130 6.7 3.3 5.7 2.5 virginica #> 131 6.8 2.8 4.8 1.4 versicolor #> 132 6.8 3.0 5.5 2.1 virginica #> 133 6.8 3.2 5.9 2.3 virginica #> 134 6.9 3.1 4.9 1.5 versicolor #> 135 6.9 3.1 5.4 2.1 virginica #> 136 6.9 3.1 5.1 2.3 virginica #> 137 6.9 3.2 5.7 2.3 virginica #> 138 7.0 3.2 4.7 1.4 versicolor #> 139 7.1 3.0 5.9 2.1 virginica #> 140 7.2 3.0 5.8 1.6 virginica #> 141 7.2 3.2 6.0 1.8 virginica #> 142 7.2 3.6 6.1 2.5 virginica #> 143 7.3 2.9 6.3 1.8 virginica #> 144 7.4 2.8 6.1 1.9 virginica #> 145 7.6 3.0 6.6 2.1 virginica #> 146 7.7 2.6 6.9 2.3 virginica #> 147 7.7 2.8 6.7 2.0 virginica #> 148 7.7 3.0 6.1 2.3 virginica #> 149 7.7 3.8 6.7 2.2 virginica #> 150 7.9 3.8 6.4 2.0 virginica iris %>% arrange(across(starts_with(\"Sepal\"), desc)) #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 7.9 3.8 6.4 2.0 virginica #> 2 7.7 3.8 6.7 2.2 virginica #> 3 7.7 3.0 6.1 2.3 virginica #> 4 7.7 2.8 6.7 2.0 virginica #> 5 7.7 2.6 6.9 2.3 virginica #> 6 7.6 3.0 6.6 2.1 virginica #> 7 7.4 2.8 6.1 1.9 virginica #> 8 7.3 2.9 6.3 1.8 virginica #> 9 7.2 3.6 6.1 2.5 virginica #> 10 7.2 3.2 6.0 1.8 virginica #> 11 7.2 3.0 5.8 1.6 virginica #> 12 7.1 3.0 5.9 2.1 virginica #> 13 7.0 3.2 4.7 1.4 versicolor #> 14 6.9 3.2 5.7 2.3 virginica #> 15 6.9 3.1 4.9 1.5 versicolor #> 16 6.9 3.1 5.4 2.1 virginica #> 17 6.9 3.1 5.1 2.3 virginica #> 18 6.8 3.2 5.9 2.3 virginica #> 19 6.8 3.0 5.5 2.1 virginica #> 20 6.8 2.8 4.8 1.4 versicolor #> 21 6.7 3.3 5.7 2.1 virginica #> 22 6.7 3.3 5.7 2.5 virginica #> 23 6.7 3.1 4.4 1.4 versicolor #> 24 6.7 3.1 4.7 1.5 versicolor #> 25 6.7 3.1 5.6 2.4 virginica #> 26 6.7 3.0 5.0 1.7 versicolor #> 27 6.7 3.0 5.2 2.3 virginica #> 28 6.7 2.5 5.8 1.8 virginica #> 29 6.6 3.0 4.4 1.4 versicolor #> 30 6.6 2.9 4.6 1.3 versicolor #> 31 6.5 3.2 5.1 2.0 virginica #> 32 6.5 3.0 5.8 2.2 virginica #> 33 6.5 3.0 5.5 1.8 virginica #> 34 6.5 3.0 5.2 2.0 virginica #> 35 6.5 2.8 4.6 1.5 versicolor #> 36 6.4 3.2 4.5 1.5 versicolor #> 37 6.4 3.2 5.3 2.3 virginica #> 38 6.4 3.1 5.5 1.8 virginica #> 39 6.4 2.9 4.3 1.3 versicolor #> 40 6.4 2.8 5.6 2.1 virginica #> 41 6.4 2.8 5.6 2.2 virginica #> 42 6.4 2.7 5.3 1.9 virginica #> 43 6.3 3.4 5.6 2.4 virginica #> 44 6.3 3.3 4.7 1.6 versicolor #> 45 6.3 3.3 6.0 2.5 virginica #> 46 6.3 2.9 5.6 1.8 virginica #> 47 6.3 2.8 5.1 1.5 virginica #> 48 6.3 2.7 4.9 1.8 virginica #> 49 6.3 2.5 4.9 1.5 versicolor #> 50 6.3 2.5 5.0 1.9 virginica #> 51 6.3 2.3 4.4 1.3 versicolor #> 52 6.2 3.4 5.4 2.3 virginica #> 53 6.2 2.9 4.3 1.3 versicolor #> 54 6.2 2.8 4.8 1.8 virginica #> 55 6.2 2.2 4.5 1.5 versicolor #> 56 6.1 3.0 4.6 1.4 versicolor #> 57 6.1 3.0 4.9 1.8 virginica #> 58 6.1 2.9 4.7 1.4 versicolor #> 59 6.1 2.8 4.0 1.3 versicolor #> 60 6.1 2.8 4.7 1.2 versicolor #> 61 6.1 2.6 5.6 1.4 virginica #> 62 6.0 3.4 4.5 1.6 versicolor #> 63 6.0 3.0 4.8 1.8 virginica #> 64 6.0 2.9 4.5 1.5 versicolor #> 65 6.0 2.7 5.1 1.6 versicolor #> 66 6.0 2.2 4.0 1.0 versicolor #> 67 6.0 2.2 5.0 1.5 virginica #> 68 5.9 3.2 4.8 1.8 versicolor #> 69 5.9 3.0 4.2 1.5 versicolor #> 70 5.9 3.0 5.1 1.8 virginica #> 71 5.8 4.0 1.2 0.2 setosa #> 72 5.8 2.8 5.1 2.4 virginica #> 73 5.8 2.7 4.1 1.0 versicolor #> 74 5.8 2.7 3.9 1.2 versicolor #> 75 5.8 2.7 5.1 1.9 virginica #> 76 5.8 2.7 5.1 1.9 virginica #> 77 5.8 2.6 4.0 1.2 versicolor #> 78 5.7 4.4 1.5 0.4 setosa #> 79 5.7 3.8 1.7 0.3 setosa #> 80 5.7 3.0 4.2 1.2 versicolor #> 81 5.7 2.9 4.2 1.3 versicolor #> 82 5.7 2.8 4.5 1.3 versicolor #> 83 5.7 2.8 4.1 1.3 versicolor #> 84 5.7 2.6 3.5 1.0 versicolor #> 85 5.7 2.5 5.0 2.0 virginica #> 86 5.6 3.0 4.5 1.5 versicolor #> 87 5.6 3.0 4.1 1.3 versicolor #> 88 5.6 2.9 3.6 1.3 versicolor #> 89 5.6 2.8 4.9 2.0 virginica #> 90 5.6 2.7 4.2 1.3 versicolor #> 91 5.6 2.5 3.9 1.1 versicolor #> 92 5.5 4.2 1.4 0.2 setosa #> 93 5.5 3.5 1.3 0.2 setosa #> 94 5.5 2.6 4.4 1.2 versicolor #> 95 5.5 2.5 4.0 1.3 versicolor #> 96 5.5 2.4 3.8 1.1 versicolor #> 97 5.5 2.4 3.7 1.0 versicolor #> 98 5.5 2.3 4.0 1.3 versicolor #> 99 5.4 3.9 1.7 0.4 setosa #> 100 5.4 3.9 1.3 0.4 setosa #> 101 5.4 3.7 1.5 0.2 setosa #> 102 5.4 3.4 1.7 0.2 setosa #> 103 5.4 3.4 1.5 0.4 setosa #> 104 5.4 3.0 4.5 1.5 versicolor #> 105 5.3 3.7 1.5 0.2 setosa #> 106 5.2 4.1 1.5 0.1 setosa #> 107 5.2 3.5 1.5 0.2 setosa #> 108 5.2 3.4 1.4 0.2 setosa #> 109 5.2 2.7 3.9 1.4 versicolor #> 110 5.1 3.8 1.5 0.3 setosa #> 111 5.1 3.8 1.9 0.4 setosa #> 112 5.1 3.8 1.6 0.2 setosa #> 113 5.1 3.7 1.5 0.4 setosa #> 114 5.1 3.5 1.4 0.2 setosa #> 115 5.1 3.5 1.4 0.3 setosa #> 116 5.1 3.4 1.5 0.2 setosa #> 117 5.1 3.3 1.7 0.5 setosa #> 118 5.1 2.5 3.0 1.1 versicolor #> 119 5.0 3.6 1.4 0.2 setosa #> 120 5.0 3.5 1.3 0.3 setosa #> 121 5.0 3.5 1.6 0.6 setosa #> 122 5.0 3.4 1.5 0.2 setosa #> 123 5.0 3.4 1.6 0.4 setosa #> 124 5.0 3.3 1.4 0.2 setosa #> 125 5.0 3.2 1.2 0.2 setosa #> 126 5.0 3.0 1.6 0.2 setosa #> 127 5.0 2.3 3.3 1.0 versicolor #> 128 5.0 2.0 3.5 1.0 versicolor #> 129 4.9 3.6 1.4 0.1 setosa #> 130 4.9 3.1 1.5 0.1 setosa #> 131 4.9 3.1 1.5 0.2 setosa #> 132 4.9 3.0 1.4 0.2 setosa #> 133 4.9 2.5 4.5 1.7 virginica #> 134 4.9 2.4 3.3 1.0 versicolor #> 135 4.8 3.4 1.6 0.2 setosa #> 136 4.8 3.4 1.9 0.2 setosa #> 137 4.8 3.1 1.6 0.2 setosa #> 138 4.8 3.0 1.4 0.1 setosa #> 139 4.8 3.0 1.4 0.3 setosa #> 140 4.7 3.2 1.3 0.2 setosa #> 141 4.7 3.2 1.6 0.2 setosa #> 142 4.6 3.6 1.0 0.2 setosa #> 143 4.6 3.4 1.4 0.3 setosa #> 144 4.6 3.2 1.4 0.2 setosa #> 145 4.6 3.1 1.5 0.2 setosa #> 146 4.5 2.3 1.3 0.3 setosa #> 147 4.4 3.2 1.3 0.2 setosa #> 148 4.4 3.0 1.3 0.2 setosa #> 149 4.4 2.9 1.4 0.2 setosa #> 150 4.3 3.0 1.1 0.1 setosa"},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Arrange rows by a selection of variables — arrange_all","title":"Arrange rows by a selection of variables — arrange_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants arrange() sort data frame selection variables. Like arrange(), can modify variables ordering .funs argument.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Arrange rows by a selection of variables — arrange_all","text":"","code":"arrange_all(.tbl, .funs = list(), ..., .by_group = FALSE, .locale = NULL) arrange_at(.tbl, .vars, .funs = list(), ..., .by_group = FALSE, .locale = NULL) arrange_if( .tbl, .predicate, .funs = list(), ..., .by_group = FALSE, .locale = NULL )"},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Arrange rows by a selection of variables — arrange_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .by_group TRUE, sort first grouping variable. Applies grouped data frames . .locale locale sort character vectors . NULL, default, uses \"C\" locale unless dplyr.legacy_locale global option escape hatch active. See dplyr-locale help page details. single string stringi::stri_locale_list() supplied, used locale sort . example, \"en\" sort American English locale. requires stringi package. \"C\" supplied, character vectors always sorted C locale. require stringi often much faster supplying locale identifier. C locale English locales, \"en\", particularly comes data containing mix upper lower case letters. explained detail locale help page Default locale section. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Arrange rows by a selection of variables — arrange_all","text":"grouping variables part selection participate sorting data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/arrange_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Arrange rows by a selection of variables — arrange_all","text":"","code":"df <- as_tibble(mtcars) arrange_all(df) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 2 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 3 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 4 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 5 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 6 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 8 15.2 8 304 150 3.15 3.44 17.3 0 0 3 2 #> 9 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2 #> 10 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> # ℹ 22 more rows # -> arrange(df, pick(everything())) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 2 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 3 13.3 8 350 245 3.73 3.84 15.4 0 0 3 4 #> 4 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 5 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 6 15 8 301 335 3.54 3.57 14.6 0 1 5 8 #> 7 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 8 15.2 8 304 150 3.15 3.44 17.3 0 0 3 2 #> 9 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2 #> 10 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 #> # ℹ 22 more rows arrange_all(df, desc) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 3 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 4 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 5 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 6 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 7 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 8 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 9 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 10 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> # ℹ 22 more rows # -> arrange(df, across(everything(), desc)) #> # A tibble: 32 × 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 33.9 4 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 2 32.4 4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 3 30.4 4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> 4 30.4 4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 5 27.3 4 79 66 4.08 1.94 18.9 1 1 4 1 #> 6 26 4 120. 91 4.43 2.14 16.7 0 1 5 2 #> 7 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 8 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 9 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 10 21.5 4 120. 97 3.7 2.46 20.0 1 0 3 1 #> # ℹ 22 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/auto_copy.html","id":null,"dir":"Reference","previous_headings":"","what":"Copy tables to same source, if necessary — auto_copy","title":"Copy tables to same source, if necessary — auto_copy","text":"Copy tables source, necessary","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/auto_copy.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Copy tables to same source, if necessary — auto_copy","text":"","code":"auto_copy(x, y, copy = FALSE, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/auto_copy.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Copy tables to same source, if necessary — auto_copy","text":"x, y y copied x, necessary. copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . ... arguments passed methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":null,"dir":"Reference","previous_headings":"","what":"Database and SQL generics. — backend_dbplyr","title":"Database and SQL generics. — backend_dbplyr","text":"sql_ generics used build different types SQL queries. default implementations dbplyr generates ANSI 92 compliant SQL. db_ generics execute actions database. default implementations dbplyr typically just call standard DBI S4 method.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Database and SQL generics. — backend_dbplyr","text":"","code":"db_desc(x) sql_translate_env(con) db_list_tables(con) db_has_table(con, table) db_data_type(con, fields) db_save_query(con, sql, name, temporary = TRUE, ...) db_begin(con, ...) db_commit(con, ...) db_rollback(con, ...) db_write_table(con, table, types, values, temporary = FALSE, ...) db_create_table(con, table, types, temporary = FALSE, ...) db_insert_into(con, table, values, ...) db_create_indexes(con, table, indexes = NULL, unique = FALSE, ...) db_create_index(con, table, columns, name = NULL, unique = FALSE, ...) db_drop_table(con, table, force = FALSE, ...) db_analyze(con, table, ...) db_explain(con, sql, ...) db_query_fields(con, sql, ...) db_query_rows(con, sql, ...) sql_select( con, select, from, where = NULL, group_by = NULL, having = NULL, order_by = NULL, limit = NULL, distinct = FALSE, ... ) sql_subquery(con, from, name = random_table_name(), ...) sql_join(con, x, y, vars, type = \"inner\", by = NULL, ...) sql_semi_join(con, x, y, anti = FALSE, by = NULL, ...) sql_set_op(con, x, y, method) sql_escape_string(con, x) sql_escape_ident(con, x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Database and SQL generics. — backend_dbplyr","text":"con database connection. table string, table name. fields list fields, data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Database and SQL generics. — backend_dbplyr","text":"Usually logical value indicating success. failures generate error. However, db_has_table() return NA temporary tables listed DBI::dbListTables() (due backend API limitations example). result, methods rely backend throw error table exists .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/backend_dbplyr.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Database and SQL generics. — backend_dbplyr","text":"backend methods call standard DBI S4 methods including db_data_type(): Calls DBI::dbDataType() every field (e.g. data frame column) returns vector corresponding SQL data types db_save_query(): Builds executes CREATE [TEMPORARY] TABLE
      ... SQL command. db_create_index(): Builds executes CREATE INDEX
      SQL command. db_drop_table(): Builds executes DROP TABLE [EXISTS]
      SQL command. db_analyze(): Builds executes ANALYZE
      SQL command. Currently, copy_to() user db_begin(), db_commit(), db_rollback(), db_write_table(), db_create_indexes(), db_drop_table() db_analyze(). find overriding many functions may suggest just override copy_to() instead. db_create_table() db_insert_into() deprecated favour db_write_table().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":null,"dir":"Reference","previous_headings":"","what":"Band membership — band_members","title":"Band membership — band_members","text":"data sets describe band members Beatles Rolling Stones. toy data sets can displayed entirety slide (e.g. demonstrate join).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Band membership — band_members","text":"","code":"band_members band_instruments band_instruments2"},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Band membership — band_members","text":"tibble two variables three observations","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Band membership — band_members","text":"band_instruments band_instruments2 contain data use different column names first column data set. band_instruments uses name, matches name key column band_members; band_instruments2 uses artist, .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/band_members.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Band membership — band_members","text":"","code":"band_members #> # A tibble: 3 × 2 #> name band #> #> 1 Mick Stones #> 2 John Beatles #> 3 Paul Beatles band_instruments #> # A tibble: 3 × 2 #> name plays #> #> 1 John guitar #> 2 Paul bass #> 3 Keith guitar band_instruments2 #> # A tibble: 3 × 2 #> artist plays #> #> 1 John guitar #> 2 Paul bass #> 3 Keith guitar"},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect where values fall in a specified range — between","title":"Detect where values fall in a specified range — between","text":"shortcut x >= left & x <= right, implemented local vectors translated appropriate SQL remote tables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect where values fall in a specified range — between","text":"","code":"between(x, left, right)"},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect where values fall in a specified range — between","text":"x vector left, right Boundary values. left right recycled size x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect where values fall in a specified range — between","text":"logical vector size x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Detect where values fall in a specified range — between","text":"x, left, right cast common type comparison made.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/between.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect where values fall in a specified range — between","text":"","code":"between(1:12, 7, 9) #> [1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE #> [12] FALSE x <- rnorm(1e2) x[between(x, -1, 1)] #> [1] 0.93536319 0.17648861 0.24368546 0.11203808 -0.13399701 #> [6] -0.27923724 -0.31344598 0.07003485 -0.63912332 -0.04996490 #> [11] -0.25148344 0.44479712 0.04653138 0.57770907 0.11819487 #> [16] 0.86208648 -0.24323674 -0.20608719 0.01917759 0.02956075 #> [21] 0.54982754 -0.36122126 0.21335575 -0.66508825 -0.24589641 #> [26] -0.97585062 0.13167063 0.48862881 0.28415034 0.23669628 #> [31] 0.52390979 0.60674805 -0.10993567 0.17218172 -0.09032729 #> [36] 0.74879127 0.55622433 -0.54825726 -0.15569378 0.43388979 #> [41] -0.38195111 0.42418757 -0.03810289 0.48614892 -0.35436116 #> [46] 0.94634789 -0.29664002 -0.38721358 -0.78543266 -0.79554143 #> [51] -0.69053790 -0.55854199 -0.53666333 0.22712713 0.97845492 #> [56] -0.20888265 0.25853729 -0.44179945 0.56859986 0.42485844 #> [61] 0.24940178 0.44945378 0.42656655 0.10758399 0.02229473 #> [66] 0.60361101 -0.26265057 -0.52826408 0.19214942 # On a tibble using `filter()` filter(starwars, between(height, 100, 150)) #> # A tibble: 5 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Leia Orga… 150 49 brown light brown 19 fema… #> 2 Mon Mothma 150 NA auburn fair blue 48 fema… #> 3 Watto 137 NA black blue, grey yellow NA male #> 4 Sebulba 112 40 none grey, red orange NA male #> 5 Gasgano 122 NA none white, bl… black NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":null,"dir":"Reference","previous_headings":"","what":"Bind multiple data frames by column — bind_cols","title":"Bind multiple data frames by column — bind_cols","text":"Bind number data frames column, making wider result. similar .call(cbind, dfs). possible prefer using join combine multiple data frames. bind_cols() binds rows order appear easy create meaningless results without realising .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bind multiple data frames by column — bind_cols","text":"","code":"bind_cols( ..., .name_repair = c(\"unique\", \"universal\", \"check_unique\", \"minimal\") )"},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bind multiple data frames by column — bind_cols","text":"... Data frames combine. argument can either data frame, list data frame, list data frames. Inputs recycled length, matched position. .name_repair One \"unique\", \"universal\", \"check_unique\". See vctrs::vec_as_names() meaning options.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bind multiple data frames by column — bind_cols","text":"data frame type first element ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_cols.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bind multiple data frames by column — bind_cols","text":"","code":"df1 <- tibble(x = 1:3) df2 <- tibble(y = 3:1) bind_cols(df1, df2) #> # A tibble: 3 × 2 #> x y #> #> 1 1 3 #> 2 2 2 #> 3 3 1 # Row sizes must be compatible when column-binding try(bind_cols(tibble(x = 1:3), tibble(y = 1:2))) #> Error in bind_cols(tibble(x = 1:3), tibble(y = 1:2)) : #> Can't recycle `..1` (size 3) to match `..2` (size 2)."},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":null,"dir":"Reference","previous_headings":"","what":"Bind multiple data frames by row — bind_rows","title":"Bind multiple data frames by row — bind_rows","text":"Bind number data frames row, making longer result. similar .call(rbind, dfs), output contain columns appear inputs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bind multiple data frames by row — bind_rows","text":"","code":"bind_rows(..., .id = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bind multiple data frames by row — bind_rows","text":"... Data frames combine. argument can either data frame, list data frame, list data frames. Columns matched name, missing columns filled NA. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bind multiple data frames by row — bind_rows","text":"data frame type first element ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/bind_rows.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bind multiple data frames by row — bind_rows","text":"","code":"df1 <- tibble(x = 1:2, y = letters[1:2]) df2 <- tibble(x = 4:5, z = 1:2) # You can supply individual data frames as arguments: bind_rows(df1, df2) #> # A tibble: 4 × 3 #> x y z #> #> 1 1 a NA #> 2 2 b NA #> 3 4 NA 1 #> 4 5 NA 2 # Or a list of data frames: bind_rows(list(df1, df2)) #> # A tibble: 4 × 3 #> x y z #> #> 1 1 a NA #> 2 2 b NA #> 3 4 NA 1 #> 4 5 NA 2 # When you supply a column name with the `.id` argument, a new # column is created to link each row to its original data frame bind_rows(list(df1, df2), .id = \"id\") #> # A tibble: 4 × 4 #> id x y z #> #> 1 1 1 a NA #> 2 1 2 b NA #> 3 2 4 NA 1 #> 4 2 5 NA 2 bind_rows(list(a = df1, b = df2), .id = \"id\") #> # A tibble: 4 × 4 #> id x y z #> #> 1 a 1 a NA #> 2 a 2 b NA #> 3 b 4 NA 1 #> 4 b 5 NA 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":null,"dir":"Reference","previous_headings":"","what":"Combine values from multiple columns — c_across","title":"Combine values from multiple columns — c_across","text":"c_across() designed work rowwise() make easy perform row-wise aggregations. two differences c(): uses tidy select semantics can easily select multiple variables. See vignette(\"rowwise\") details. uses vctrs::vec_c() order give safer outputs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Combine values from multiple columns — c_across","text":"","code":"c_across(cols)"},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Combine values from multiple columns — c_across","text":"cols Columns transform. select grouping columns already automatically handled verb (.e. summarise() mutate()).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/c_across.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Combine values from multiple columns — c_across","text":"","code":"df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4)) df %>% rowwise() %>% mutate( sum = sum(c_across(w:z)), sd = sd(c_across(w:z)) ) #> # A tibble: 4 × 7 #> # Rowwise: #> id w x y z sum sd #> #> 1 1 0.126 0.533 0.172 0.196 1.03 0.186 #> 2 2 0.938 0.547 0.691 0.969 3.14 0.202 #> 3 3 0.801 0.0959 0.675 0.387 1.96 0.315 #> 4 4 0.758 0.388 0.946 0.650 2.74 0.233"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":null,"dir":"Reference","previous_headings":"","what":"A general vectorised switch() — case_match","title":"A general vectorised switch() — case_match","text":"function allows vectorise multiple switch() statements. case evaluated sequentially first match element determines corresponding value output vector. cases match, .default used. case_match() R equivalent SQL \"simple\" CASE statement.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"connection-to-case-when-","dir":"Reference","previous_headings":"","what":"Connection to case_when()","title":"A general vectorised switch() — case_match","text":"case_when() uses logical expressions left-hand side formula, case_match() uses values match .x . following two statements roughly equivalent:","code":"case_when( x %in% c(\"a\", \"b\") ~ 1, x %in% \"c\" ~ 2, x %in% c(\"d\", \"e\") ~ 3 ) case_match( x, c(\"a\", \"b\") ~ 1, \"c\" ~ 2, c(\"d\", \"e\") ~ 3 )"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A general vectorised switch() — case_match","text":"","code":"case_match(.x, ..., .default = NULL, .ptype = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A general vectorised switch() — case_match","text":".x vector match . ... sequence two-sided formulas: old_values ~ new_value. right hand side (RHS) determines output value values .x match left hand side (LHS). LHS must evaluate type vector .x. can length, allowing map multiple .x values RHS value. value repeated LHS, .e. value .x matches multiple cases, first match used. RHS inputs coerced common type. RHS input recycled size .x. .default value used values .x matched LHS inputs. NULL, default, missing value used. .default recycled size .x. .ptype optional prototype declaring desired output type. supplied, output type taken common type RHS inputs .default.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"A general vectorised switch() — case_match","text":"vector size .x type common type RHS inputs .default (overridden .ptype).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/case_match.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A general vectorised switch() — case_match","text":"","code":"x <- c(\"a\", \"b\", \"a\", \"d\", \"b\", NA, \"c\", \"e\") # `case_match()` acts like a vectorized `switch()`. # Unmatched values \"fall through\" as a missing value. case_match( x, \"a\" ~ 1, \"b\" ~ 2, \"c\" ~ 3, \"d\" ~ 4 ) #> [1] 1 2 1 4 2 NA 3 NA # Missing values can be matched exactly, and `.default` can be used to # control the value used for unmatched values of `.x` case_match( x, \"a\" ~ 1, \"b\" ~ 2, \"c\" ~ 3, \"d\" ~ 4, NA ~ 0, .default = 100 ) #> [1] 1 2 1 4 2 0 3 100 # Input values can be grouped into the same expression to map them to the # same output value case_match( x, c(\"a\", \"b\") ~ \"low\", c(\"c\", \"d\", \"e\") ~ \"high\" ) #> [1] \"low\" \"low\" \"low\" \"high\" \"low\" NA \"high\" \"high\" # `case_match()` isn't limited to character input: y <- c(1, 2, 1, 3, 1, NA, 2, 4) case_match( y, c(1, 3) ~ \"odd\", c(2, 4) ~ \"even\", .default = \"missing\" ) #> [1] \"odd\" \"even\" \"odd\" \"odd\" \"odd\" \"missing\" \"even\" #> [8] \"even\" # Setting `.default` to the original vector is a useful way to replace # selected values, leaving everything else as is case_match(y, NA ~ 0, .default = y) #> [1] 1 2 1 3 1 0 2 4 starwars %>% mutate( # Replace missings, but leave everything else alone hair_color = case_match(hair_color, NA ~ \"unknown\", .default = hair_color), # Replace some, but not all, of the species species = case_match( species, \"Human\" ~ \"Humanoid\", \"Droid\" ~ \"Robot\", c(\"Wookiee\", \"Ewok\") ~ \"Hairy\", .default = species ), .keep = \"used\" ) #> # A tibble: 87 × 2 #> hair_color species #> #> 1 blond Humanoid #> 2 unknown Robot #> 3 unknown Robot #> 4 none Humanoid #> 5 brown Humanoid #> 6 brown, grey Humanoid #> 7 brown Humanoid #> 8 unknown Robot #> 9 black Humanoid #> 10 auburn, white Humanoid #> # ℹ 77 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":null,"dir":"Reference","previous_headings":"","what":"A general vectorised if-else — case_when","title":"A general vectorised if-else — case_when","text":"function allows vectorise multiple if_else() statements. case evaluated sequentially first match element determines corresponding value output vector. cases match, .default used final \"else\" statment. case_when() R equivalent SQL \"searched\" CASE statement.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A general vectorised if-else — case_when","text":"","code":"case_when(..., .default = NULL, .ptype = NULL, .size = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A general vectorised if-else — case_when","text":"... sequence two-sided formulas. left hand side (LHS) determines values match case. right hand side (RHS) provides replacement value. LHS inputs must evaluate logical vectors. RHS inputs coerced common type. inputs recycled common size. said, encourage LHS inputs size. Recycling mainly useful RHS inputs, might supply size 1 input recycled size LHS inputs. NULL inputs ignored. .default value used LHS inputs return either FALSE NA. .default must size 1 size common size computed .... .default participates computation common type RHS inputs. NA values LHS conditions treated like FALSE, meaning result locations assigned .default value. handle missing values conditions differently, must explicitly catch another condition fall .default. typically involves variation .na(x) ~ value tailored usage case_when(). NULL, default, missing value used. .ptype optional prototype declaring desired output type. supplied, overrides common type RHS inputs. .size optional size declaring desired output size. supplied, overrides common size computed ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"A general vectorised if-else — case_when","text":"vector size common size computed inputs ... type common type RHS inputs ....","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/case_when.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A general vectorised if-else — case_when","text":"","code":"x <- 1:70 case_when( x %% 35 == 0 ~ \"fizz buzz\", x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", .default = as.character(x) ) #> [1] \"1\" \"2\" \"3\" \"4\" \"fizz\" #> [6] \"6\" \"buzz\" \"8\" \"9\" \"fizz\" #> [11] \"11\" \"12\" \"13\" \"buzz\" \"fizz\" #> [16] \"16\" \"17\" \"18\" \"19\" \"fizz\" #> [21] \"buzz\" \"22\" \"23\" \"24\" \"fizz\" #> [26] \"26\" \"27\" \"buzz\" \"29\" \"fizz\" #> [31] \"31\" \"32\" \"33\" \"34\" \"fizz buzz\" #> [36] \"36\" \"37\" \"38\" \"39\" \"fizz\" #> [41] \"41\" \"buzz\" \"43\" \"44\" \"fizz\" #> [46] \"46\" \"47\" \"48\" \"buzz\" \"fizz\" #> [51] \"51\" \"52\" \"53\" \"54\" \"fizz\" #> [56] \"buzz\" \"57\" \"58\" \"59\" \"fizz\" #> [61] \"61\" \"62\" \"buzz\" \"64\" \"fizz\" #> [66] \"66\" \"67\" \"68\" \"69\" \"fizz buzz\" # Like an if statement, the arguments are evaluated in order, so you must # proceed from the most specific to the most general. This won't work: case_when( x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", x %% 35 == 0 ~ \"fizz buzz\", .default = as.character(x) ) #> [1] \"1\" \"2\" \"3\" \"4\" \"fizz\" \"6\" \"buzz\" \"8\" \"9\" \"fizz\" #> [11] \"11\" \"12\" \"13\" \"buzz\" \"fizz\" \"16\" \"17\" \"18\" \"19\" \"fizz\" #> [21] \"buzz\" \"22\" \"23\" \"24\" \"fizz\" \"26\" \"27\" \"buzz\" \"29\" \"fizz\" #> [31] \"31\" \"32\" \"33\" \"34\" \"fizz\" \"36\" \"37\" \"38\" \"39\" \"fizz\" #> [41] \"41\" \"buzz\" \"43\" \"44\" \"fizz\" \"46\" \"47\" \"48\" \"buzz\" \"fizz\" #> [51] \"51\" \"52\" \"53\" \"54\" \"fizz\" \"buzz\" \"57\" \"58\" \"59\" \"fizz\" #> [61] \"61\" \"62\" \"buzz\" \"64\" \"fizz\" \"66\" \"67\" \"68\" \"69\" \"fizz\" # If none of the cases match and no `.default` is supplied, NA is used: case_when( x %% 35 == 0 ~ \"fizz buzz\", x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", ) #> [1] NA NA NA NA \"fizz\" #> [6] NA \"buzz\" NA NA \"fizz\" #> [11] NA NA NA \"buzz\" \"fizz\" #> [16] NA NA NA NA \"fizz\" #> [21] \"buzz\" NA NA NA \"fizz\" #> [26] NA NA \"buzz\" NA \"fizz\" #> [31] NA NA NA NA \"fizz buzz\" #> [36] NA NA NA NA \"fizz\" #> [41] NA \"buzz\" NA NA \"fizz\" #> [46] NA NA NA \"buzz\" \"fizz\" #> [51] NA NA NA NA \"fizz\" #> [56] \"buzz\" NA NA NA \"fizz\" #> [61] NA NA \"buzz\" NA \"fizz\" #> [66] NA NA NA NA \"fizz buzz\" # Note that `NA` values on the LHS are treated like `FALSE` and will be # assigned the `.default` value. You must handle them explicitly if you # want to use a different value. The exact way to handle missing values is # dependent on the set of LHS conditions you use. x[2:4] <- NA_real_ case_when( x %% 35 == 0 ~ \"fizz buzz\", x %% 5 == 0 ~ \"fizz\", x %% 7 == 0 ~ \"buzz\", is.na(x) ~ \"nope\", .default = as.character(x) ) #> [1] \"1\" \"nope\" \"nope\" \"nope\" \"fizz\" #> [6] \"6\" \"buzz\" \"8\" \"9\" \"fizz\" #> [11] \"11\" \"12\" \"13\" \"buzz\" \"fizz\" #> [16] \"16\" \"17\" \"18\" \"19\" \"fizz\" #> [21] \"buzz\" \"22\" \"23\" \"24\" \"fizz\" #> [26] \"26\" \"27\" \"buzz\" \"29\" \"fizz\" #> [31] \"31\" \"32\" \"33\" \"34\" \"fizz buzz\" #> [36] \"36\" \"37\" \"38\" \"39\" \"fizz\" #> [41] \"41\" \"buzz\" \"43\" \"44\" \"fizz\" #> [46] \"46\" \"47\" \"48\" \"buzz\" \"fizz\" #> [51] \"51\" \"52\" \"53\" \"54\" \"fizz\" #> [56] \"buzz\" \"57\" \"58\" \"59\" \"fizz\" #> [61] \"61\" \"62\" \"buzz\" \"64\" \"fizz\" #> [66] \"66\" \"67\" \"68\" \"69\" \"fizz buzz\" # `case_when()` evaluates all RHS expressions, and then constructs its # result by extracting the selected (via the LHS expressions) parts. # In particular `NaN`s are produced in this case: y <- seq(-2, 2, by = .5) case_when( y >= 0 ~ sqrt(y), .default = y ) #> Warning: NaNs produced #> [1] -2.0000000 -1.5000000 -1.0000000 -0.5000000 0.0000000 0.7071068 #> [7] 1.0000000 1.2247449 1.4142136 # `case_when()` is particularly useful inside `mutate()` when you want to # create a new variable that relies on a complex combination of existing # variables starwars %>% select(name:mass, gender, species) %>% mutate( type = case_when( height > 200 | mass > 200 ~ \"large\", species == \"Droid\" ~ \"robot\", .default = \"other\" ) ) #> # A tibble: 87 × 6 #> name height mass gender species type #> #> 1 Luke Skywalker 172 77 masculine Human other #> 2 C-3PO 167 75 masculine Droid robot #> 3 R2-D2 96 32 masculine Droid robot #> 4 Darth Vader 202 136 masculine Human large #> 5 Leia Organa 150 49 feminine Human other #> 6 Owen Lars 178 120 masculine Human other #> 7 Beru Whitesun Lars 165 75 feminine Human other #> 8 R5-D4 97 32 masculine Droid robot #> 9 Biggs Darklighter 183 84 masculine Human other #> 10 Obi-Wan Kenobi 182 77 masculine Human other #> # ℹ 77 more rows # `case_when()` is not a tidy eval function. If you'd like to reuse # the same patterns, extract the `case_when()` call in a normal # function: case_character_type <- function(height, mass, species) { case_when( height > 200 | mass > 200 ~ \"large\", species == \"Droid\" ~ \"robot\", .default = \"other\" ) } case_character_type(150, 250, \"Droid\") #> [1] \"large\" case_character_type(150, 150, \"Droid\") #> [1] \"robot\" # Such functions can be used inside `mutate()` as well: starwars %>% mutate(type = case_character_type(height, mass, species)) %>% pull(type) #> [1] \"other\" \"robot\" \"robot\" \"large\" \"other\" \"other\" \"other\" \"robot\" #> [9] \"other\" \"other\" \"other\" \"other\" \"large\" \"other\" \"other\" \"large\" #> [17] \"other\" \"other\" \"other\" \"other\" \"other\" \"robot\" \"other\" \"other\" #> [25] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [33] \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" \"other\" \"other\" #> [41] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [49] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" #> [57] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [65] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" \"large\" #> [73] \"other\" \"robot\" \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" #> [81] \"other\" \"large\" \"other\" \"other\" \"other\" \"robot\" \"other\" # `case_when()` ignores `NULL` inputs. This is useful when you'd # like to use a pattern only under certain conditions. Here we'll # take advantage of the fact that `if` returns `NULL` when there is # no `else` clause: case_character_type <- function(height, mass, species, robots = TRUE) { case_when( height > 200 | mass > 200 ~ \"large\", if (robots) species == \"Droid\" ~ \"robot\", .default = \"other\" ) } starwars %>% mutate(type = case_character_type(height, mass, species, robots = FALSE)) %>% pull(type) #> [1] \"other\" \"other\" \"other\" \"large\" \"other\" \"other\" \"other\" \"other\" #> [9] \"other\" \"other\" \"other\" \"other\" \"large\" \"other\" \"other\" \"large\" #> [17] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [25] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [33] \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" \"other\" \"other\" #> [41] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [49] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" #> [57] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" #> [65] \"other\" \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" \"large\" #> [73] \"other\" \"other\" \"other\" \"other\" \"other\" \"large\" \"large\" \"other\" #> [81] \"other\" \"large\" \"other\" \"other\" \"other\" \"other\" \"other\""},{"path":"https://dplyr.tidyverse.org/dev/reference/check_dbplyr.html","id":null,"dir":"Reference","previous_headings":"","what":"dbplyr compatibility functions — check_dbplyr","title":"dbplyr compatibility functions — check_dbplyr","text":"dplyr 0.7.0, number database SQL functions moved dplyr dbplyr. generic functions stayed dplyr (since easy way conditionally import generic different packages), many SQL database helper functions moved. written backend, functions generate code need work dplyr 0.5.0 dplyr 0.7.0.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/check_dbplyr.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"dbplyr compatibility functions — check_dbplyr","text":"","code":"check_dbplyr() wrap_dbplyr_obj(obj_name)"},{"path":"https://dplyr.tidyverse.org/dev/reference/check_dbplyr.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"dbplyr compatibility functions — check_dbplyr","text":"","code":"wrap_dbplyr_obj(\"build_sql\") #> build_sql <- function (obj_name) #> { #> if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { #> dplyr::check_dbplyr() #> dbplyr::build_sql(obj_name = obj_name) #> } #> else { #> dplyr::build_sql(obj_name = obj_name) #> } #> } wrap_dbplyr_obj(\"base_agg\") #> base_agg <- function () #> { #> if (utils::packageVersion(\"dplyr\") > \"0.5.0\") { #> dplyr::check_dbplyr() #> dbplyr::base_agg #> } #> else { #> dplyr::base_agg #> } #> }"},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":null,"dir":"Reference","previous_headings":"","what":"Find the first non-missing element — coalesce","title":"Find the first non-missing element — coalesce","text":"Given set vectors, coalesce() finds first non-missing value position. inspired SQL COALESCE function thing SQL NULLs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Find the first non-missing element — coalesce","text":"","code":"coalesce(..., .ptype = NULL, .size = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Find the first non-missing element — coalesce","text":"... One vectors. recycled , cast common type. .ptype optional prototype declaring desired output type. supplied, overrides common type vectors .... .size optional size declaring desired output size. supplied, overrides common size vectors ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Find the first non-missing element — coalesce","text":"vector type size common type common size vectors ....","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/coalesce.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Find the first non-missing element — coalesce","text":"","code":"# Use a single value to replace all missing values x <- sample(c(1:5, NA, NA, NA)) coalesce(x, 0L) #> [1] 2 3 4 0 0 0 5 1 # The equivalent to a missing value in a list is `NULL` coalesce(list(1, 2, NULL), list(NA)) #> [[1]] #> [1] 1 #> #> [[2]] #> [1] 2 #> #> [[3]] #> [1] NA #> # Or generate a complete vector from partially missing pieces y <- c(1, 2, NA, NA, 5) z <- c(NA, NA, 3, 4, 5) coalesce(y, z) #> [1] 1 2 3 4 5 # Supply lists by splicing them into dots: vecs <- list( c(1, 2, NA, NA, 5), c(NA, NA, 3, 4, 5) ) coalesce(!!!vecs) #> [1] 1 2 3 4 5"},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":null,"dir":"Reference","previous_headings":"","what":"Combine vectors — combine","title":"Combine vectors — combine","text":"combine() deprecated favour vctrs::vec_c(). combine() attempted automatically guess whether wanted c() unlist(), fail surprising ways. now believe better explicit.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Combine vectors — combine","text":"","code":"combine(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Combine vectors — combine","text":"... Vectors combine.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/combine.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Combine vectors — combine","text":"","code":"f1 <- factor(\"a\") f2 <- factor(\"b\") combine(f1, f2) #> Warning: `combine()` was deprecated in dplyr 1.0.0. #> ℹ Please use `vctrs::vec_c()` instead. #> [1] a b #> Levels: a b # -> vctrs::vec_c(f1, f1) #> [1] a a #> Levels: a combine(list(f1, f2)) #> Warning: `combine()` was deprecated in dplyr 1.0.0. #> ℹ Please use `vctrs::vec_c()` instead. #> [1] a b #> Levels: a b # -> vctrs::vec_c(!!!list(f1, f2)) #> [1] a b #> Levels: a b"},{"path":"https://dplyr.tidyverse.org/dev/reference/common_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract out common by variables — common_by","title":"Extract out common by variables — common_by","text":"Extract common variables","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/common_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract out common by variables — common_by","text":"","code":"common_by(by = NULL, x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":null,"dir":"Reference","previous_headings":"","what":"Force computation of a database query — compute","title":"Force computation of a database query — compute","text":"compute() stores results remote temporary table. collect() retrieves data local tibble. collapse() slightly different: force computation, instead forces generation SQL query. sometimes needed work around bugs dplyr's SQL generation. functions preserve grouping ordering.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Force computation of a database query — compute","text":"","code":"compute(x, ...) collect(x, ...) collapse(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Force computation of a database query — compute","text":"x data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Arguments passed methods","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Force computation of a database query — compute","text":"functions generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: compute(): dbplyr (tbl_sql), dplyr (data.frame) collect(): dbplyr (tbl_sql), dplyr (data.frame) collapse(): dbplyr (tbl_sql), dplyr (data.frame)","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/compute.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Force computation of a database query — compute","text":"","code":"mtcars2 <- dbplyr::src_memdb() %>% copy_to(mtcars, name = \"mtcars2-cc\", overwrite = TRUE) remote <- mtcars2 %>% filter(cyl == 8) %>% select(mpg:drat) # Compute query and save in remote table compute(remote) #> # Source: table<`dbplyr_rUoBF7QxMz`> [?? x 5] #> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat #> #> 1 18.7 8 360 175 3.15 #> 2 14.3 8 360 245 3.21 #> 3 16.4 8 276. 180 3.07 #> 4 17.3 8 276. 180 3.07 #> 5 15.2 8 276. 180 3.07 #> 6 10.4 8 472 205 2.93 #> 7 10.4 8 460 215 3 #> 8 14.7 8 440 230 3.23 #> 9 15.5 8 318 150 2.76 #> 10 15.2 8 304 150 3.15 #> # ℹ more rows # Compute query bring back to this session collect(remote) #> # A tibble: 14 × 5 #> mpg cyl disp hp drat #> #> 1 18.7 8 360 175 3.15 #> 2 14.3 8 360 245 3.21 #> 3 16.4 8 276. 180 3.07 #> 4 17.3 8 276. 180 3.07 #> 5 15.2 8 276. 180 3.07 #> 6 10.4 8 472 205 2.93 #> 7 10.4 8 460 215 3 #> 8 14.7 8 440 230 3.23 #> 9 15.5 8 318 150 2.76 #> 10 15.2 8 304 150 3.15 #> 11 13.3 8 350 245 3.73 #> 12 19.2 8 400 175 3.08 #> 13 15.8 8 351 264 4.22 #> 14 15 8 301 335 3.54 # Creates a fresh query based on the generated SQL collapse(remote) #> # Source: SQL [?? x 5] #> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat #> #> 1 18.7 8 360 175 3.15 #> 2 14.3 8 360 245 3.21 #> 3 16.4 8 276. 180 3.07 #> 4 17.3 8 276. 180 3.07 #> 5 15.2 8 276. 180 3.07 #> 6 10.4 8 472 205 2.93 #> 7 10.4 8 460 215 3 #> 8 14.7 8 440 230 3.23 #> 9 15.5 8 318 150 2.76 #> 10 15.2 8 304 150 3.15 #> # ℹ more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":null,"dir":"Reference","previous_headings":"","what":"Generate a unique identifier for consecutive combinations — consecutive_id","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"consecutive_id() generates unique identifier increments every time variable (combination variables) changes. Inspired data.table::rleid().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"","code":"consecutive_id(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"... Unnamed vectors. multiple vectors supplied, length.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"numeric vector length longest element ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/consecutive_id.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Generate a unique identifier for consecutive combinations — consecutive_id","text":"","code":"consecutive_id(c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, NA, NA)) #> [1] 1 1 2 2 3 4 5 5 consecutive_id(c(1, 1, 1, 2, 1, 1, 2, 2)) #> [1] 1 1 1 2 3 3 4 4 df <- data.frame(x = c(0, 0, 1, 0), y = c(2, 2, 2, 2)) df %>% group_by(x, y) %>% summarise(n = n()) #> `summarise()` has grouped output by 'x'. You can override using the #> `.groups` argument. #> # A tibble: 2 × 3 #> # Groups: x [2] #> x y n #> #> 1 0 2 3 #> 2 1 2 1 df %>% group_by(id = consecutive_id(x, y), x, y) %>% summarise(n = n()) #> `summarise()` has grouped output by 'id', 'x'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 4 #> # Groups: id, x [3] #> id x y n #> #> 1 1 0 2 2 #> 2 2 1 2 1 #> 3 3 0 2 1"},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":null,"dir":"Reference","previous_headings":"","what":"Information about the ","title":"Information about the ","text":"functions return information \"current\" group \"current\" variable, work inside specific contexts like summarise() mutate(). n() gives current group size. cur_group() gives group keys, tibble one row one column grouping variable. cur_group_id() gives unique numeric identifier current group. cur_group_rows() gives row indices current group. cur_column() gives name current column (across() ). See group_data() equivalent functions return values groups. See pick() way select subset columns using tidyselect syntax inside summarise() mutate().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Information about the ","text":"","code":"n() cur_group() cur_group_id() cur_group_rows() cur_column()"},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":"data-table","dir":"Reference","previous_headings":"","what":"data.table","title":"Information about the ","text":"familiar data.table: cur_group_id() <-> .GRP cur_group() <-> .cur_group_rows() <-> .See pick() equivalent .SD.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/context.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Information about the ","text":"","code":"df <- tibble( g = sample(rep(letters[1:3], 1:3)), x = runif(6), y = runif(6) ) gf <- df %>% group_by(g) gf %>% summarise(n = n()) #> # A tibble: 3 × 2 #> g n #> #> 1 a 1 #> 2 b 2 #> 3 c 3 gf %>% mutate(id = cur_group_id()) #> # A tibble: 6 × 4 #> # Groups: g [3] #> g x y id #> #> 1 c 0.0367 0.642 3 #> 2 b 0.733 0.327 2 #> 3 b 0.215 0.387 2 #> 4 a 0.0160 0.708 1 #> 5 c 0.129 0.698 3 #> 6 c 0.686 0.926 3 gf %>% reframe(row = cur_group_rows()) #> # A tibble: 6 × 2 #> g row #> #> 1 a 4 #> 2 b 2 #> 3 b 3 #> 4 c 1 #> 5 c 5 #> 6 c 6 gf %>% summarise(data = list(cur_group())) #> # A tibble: 3 × 2 #> g data #> #> 1 a #> 2 b #> 3 c gf %>% mutate(across(everything(), ~ paste(cur_column(), round(.x, 2)))) #> # A tibble: 6 × 3 #> # Groups: g [3] #> g x y #> #> 1 c x 0.04 y 0.64 #> 2 b x 0.73 y 0.33 #> 3 b x 0.22 y 0.39 #> 4 a x 0.02 y 0.71 #> 5 c x 0.13 y 0.7 #> 6 c x 0.69 y 0.93"},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":null,"dir":"Reference","previous_headings":"","what":"Copy a local data frame to a remote src — copy_to","title":"Copy a local data frame to a remote src — copy_to","text":"function uploads local data frame remote data source, creating table definition needed. Wherever possible, new object temporary, limited current connection source.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Copy a local data frame to a remote src — copy_to","text":"","code":"copy_to(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Copy a local data frame to a remote src — copy_to","text":"dest remote data source df local data frame name name new remote table. overwrite TRUE, overwrite existing table name name. FALSE, throw error name already exists. ... parameters passed methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Copy a local data frame to a remote src — copy_to","text":"tbl object remote source","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Copy a local data frame to a remote src — copy_to","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (src_sql), dplyr (DBIConnection, src_local) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/copy_to.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Copy a local data frame to a remote src — copy_to","text":"","code":"if (FALSE) { iris2 <- dbplyr::src_memdb() %>% copy_to(iris, overwrite = TRUE) iris2 }"},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":null,"dir":"Reference","previous_headings":"","what":"Count the observations in each group — count","title":"Count the observations in each group — count","text":"count() lets quickly count unique values one variables: df %>% count(, b) roughly equivalent df %>% group_by(, b) %>% summarise(n = n()). count() paired tally(), lower-level helper equivalent df %>% summarise(n = n()). Supply wt perform weighted counts, switching summary n = n() n = sum(wt). add_count() add_tally() equivalents count() tally() use mutate() instead summarise() add new column group-wise counts.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Count the observations in each group — count","text":"","code":"count(x, ..., wt = NULL, sort = FALSE, name = NULL) # S3 method for data.frame count( x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x) ) tally(x, wt = NULL, sort = FALSE, name = NULL) add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) add_tally(x, wt = NULL, sort = FALSE, name = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Count the observations in each group — count","text":"x data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). ... Variables group . wt Frequency weights. Can NULL variable: NULL (default), counts number rows group. variable, computes sum(wt) group. sort TRUE, show largest groups top. name name new column output. omitted, default n. already column called n, use nn. column called n nn, 'll use nnn, , adding ns gets new name. .drop Handling factor levels appear data, passed group_by(). count(): FALSE include counts empty groups (.e. levels factors exist data). add_count(): deprecated since actually affect output.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Count the observations in each group — count","text":"object type .data. count() add_count() group transiently, output groups input.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/count.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Count the observations in each group — count","text":"","code":"# count() is a convenient way to get a sense of the distribution of # values in a dataset starwars %>% count(species) #> # A tibble: 38 × 2 #> species n #> #> 1 Aleena 1 #> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 #> 5 Clawdite 1 #> 6 Droid 6 #> 7 Dug 1 #> 8 Ewok 1 #> 9 Geonosian 1 #> 10 Gungan 3 #> # ℹ 28 more rows starwars %>% count(species, sort = TRUE) #> # A tibble: 38 × 2 #> species n #> #> 1 Human 35 #> 2 Droid 6 #> 3 NA 4 #> 4 Gungan 3 #> 5 Kaminoan 2 #> 6 Mirialan 2 #> 7 Twi'lek 2 #> 8 Wookiee 2 #> 9 Zabrak 2 #> 10 Aleena 1 #> # ℹ 28 more rows starwars %>% count(sex, gender, sort = TRUE) #> # A tibble: 6 × 3 #> sex gender n #> #> 1 male masculine 60 #> 2 female feminine 16 #> 3 none masculine 5 #> 4 NA NA 4 #> 5 hermaphroditic masculine 1 #> 6 none feminine 1 starwars %>% count(birth_decade = round(birth_year, -1)) #> # A tibble: 15 × 2 #> birth_decade n #> #> 1 10 1 #> 2 20 6 #> 3 30 4 #> 4 40 6 #> 5 50 8 #> 6 60 4 #> 7 70 4 #> 8 80 2 #> 9 90 3 #> 10 100 1 #> 11 110 1 #> 12 200 1 #> 13 600 1 #> 14 900 1 #> 15 NA 44 # use the `wt` argument to perform a weighted count. This is useful # when the data has already been aggregated once df <- tribble( ~name, ~gender, ~runs, \"Max\", \"male\", 10, \"Sandra\", \"female\", 1, \"Susan\", \"female\", 4 ) # counts rows: df %>% count(gender) #> # A tibble: 2 × 2 #> gender n #> #> 1 female 2 #> 2 male 1 # counts runs: df %>% count(gender, wt = runs) #> # A tibble: 2 × 2 #> gender n #> #> 1 female 5 #> 2 male 10 # When factors are involved, `.drop = FALSE` can be used to retain factor # levels that don't appear in the data df2 <- tibble( id = 1:5, type = factor(c(\"a\", \"c\", \"a\", NA, \"a\"), levels = c(\"a\", \"b\", \"c\")) ) df2 %>% count(type) #> # A tibble: 3 × 2 #> type n #> #> 1 a 3 #> 2 c 1 #> 3 NA 1 df2 %>% count(type, .drop = FALSE) #> # A tibble: 4 × 2 #> type n #> #> 1 a 3 #> 2 b 0 #> 3 c 1 #> 4 NA 1 # Or, using `group_by()`: df2 %>% group_by(type, .drop = FALSE) %>% count() #> # A tibble: 4 × 2 #> # Groups: type [4] #> type n #> #> 1 a 3 #> 2 b 0 #> 3 c 1 #> 4 NA 1 # tally() is a lower-level function that assumes you've done the grouping starwars %>% tally() #> # A tibble: 1 × 1 #> n #> #> 1 87 starwars %>% group_by(species) %>% tally() #> # A tibble: 38 × 2 #> species n #> #> 1 Aleena 1 #> 2 Besalisk 1 #> 3 Cerean 1 #> 4 Chagrian 1 #> 5 Clawdite 1 #> 6 Droid 6 #> 7 Dug 1 #> 8 Ewok 1 #> 9 Geonosian 1 #> 10 Gungan 3 #> # ℹ 28 more rows # both count() and tally() have add_ variants that work like # mutate() instead of summarise df %>% add_count(gender, wt = runs) #> # A tibble: 3 × 4 #> name gender runs n #> #> 1 Max male 10 10 #> 2 Sandra female 1 5 #> 3 Susan female 4 5 df %>% add_tally(wt = runs) #> # A tibble: 3 × 4 #> name gender runs n #> #> 1 Max male 10 15 #> 2 Sandra female 1 15 #> 3 Susan female 4 15"},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":null,"dir":"Reference","previous_headings":"","what":"Cross join — cross_join","title":"Cross join — cross_join","text":"Cross joins match row x every row y, resulting data frame nrow(x) * nrow(y) rows. Since cross joins result possible matches x y, technically serve basis mutating joins, can generally thought cross joins followed filter. practice, specialized procedure used better performance.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cross join — cross_join","text":"","code":"cross_join(x, y, ..., copy = FALSE, suffix = c(\".x\", \".y\"))"},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cross join — cross_join","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. ... parameters passed onto methods. copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . suffix non-joined duplicate variables x y, suffixes added output disambiguate . character vector length 2.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Cross join — cross_join","text":"object type x (including groups). output following properties: nrow(x) * nrow(y) rows returned. Output columns include columns x y. Column name collisions resolved using suffix. order rows columns x preserved much possible.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Cross join — cross_join","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/cross_join.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Cross join — cross_join","text":"","code":"# Cross joins match each row in `x` to every row in `y`. # Data within the columns is not used in the matching process. cross_join(band_instruments, band_members) #> # A tibble: 9 × 4 #> name.x plays name.y band #> #> 1 John guitar Mick Stones #> 2 John guitar John Beatles #> 3 John guitar Paul Beatles #> 4 Paul bass Mick Stones #> 5 Paul bass John Beatles #> 6 Paul bass Paul Beatles #> 7 Keith guitar Mick Stones #> 8 Keith guitar John Beatles #> 9 Keith guitar Paul Beatles # Control the suffix added to variables duplicated in # `x` and `y` with `suffix`. cross_join(band_instruments, band_members, suffix = c(\"\", \"_y\")) #> # A tibble: 9 × 4 #> name plays name_y band #> #> 1 John guitar Mick Stones #> 2 John guitar John Beatles #> 3 John guitar Paul Beatles #> 4 Paul bass Mick Stones #> 5 Paul bass John Beatles #> 6 Paul bass Paul Beatles #> 7 Keith guitar Mick Stones #> 8 Keith guitar John Beatles #> 9 Keith guitar Paul Beatles"},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":null,"dir":"Reference","previous_headings":"","what":"Cumulativate versions of any, all, and mean — cumall","title":"Cumulativate versions of any, all, and mean — cumall","text":"dplyr provides cumall(), cumany(), cummean() complete R's set cumulative functions.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cumulativate versions of any, all, and mean — cumall","text":"","code":"cumall(x) cumany(x) cummean(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cumulativate versions of any, all, and mean — cumall","text":"x cumall() cumany(), logical vector; cummean() integer numeric vector.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Cumulativate versions of any, all, and mean — cumall","text":"vector length x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"cumulative-logical-functions","dir":"Reference","previous_headings":"","what":"Cumulative logical functions","title":"Cumulativate versions of any, all, and mean — cumall","text":"particularly useful conjunction filter(): cumall(x): cases first FALSE. cumall(!x): cases first TRUE. cumany(x): cases first TRUE. cumany(!x): cases first FALSE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/cumall.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Cumulativate versions of any, all, and mean — cumall","text":"","code":"# `cummean()` returns a numeric/integer vector of the same length # as the input vector. x <- c(1, 3, 5, 2, 2) cummean(x) #> [1] 1.00 2.00 3.00 2.75 2.60 cumsum(x) / seq_along(x) #> [1] 1.00 2.00 3.00 2.75 2.60 # `cumall()` and `cumany()` return logicals cumall(x < 5) #> [1] TRUE TRUE FALSE FALSE FALSE cumany(x == 3) #> [1] FALSE TRUE TRUE TRUE TRUE # `cumall()` vs. `cumany()` df <- data.frame( date = as.Date(\"2020-01-01\") + 0:6, balance = c(100, 50, 25, -25, -50, 30, 120) ) # all rows after first overdraft df %>% filter(cumany(balance < 0)) #> date balance #> 1 2020-01-04 -25 #> 2 2020-01-05 -50 #> 3 2020-01-06 30 #> 4 2020-01-07 120 # all rows until first overdraft df %>% filter(cumall(!(balance < 0))) #> date balance #> 1 2020-01-01 100 #> 2 2020-01-02 50 #> 3 2020-01-03 25"},{"path":"https://dplyr.tidyverse.org/dev/reference/defunct.html","id":null,"dir":"Reference","previous_headings":"","what":"Defunct functions — defunct","title":"Defunct functions — defunct","text":"functions deprecated least two years made defunct. known replacement, calling function tell .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/defunct.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Defunct functions — defunct","text":"","code":"# Deprecated in 0.5.0 ------------------------------------- id(.variables, drop = FALSE) # Deprecated in 0.7.0 ------------------------------------- failwith(default = NULL, f, quiet = FALSE) # Deprecated in 0.8.* ------------------------------------- select_vars(vars = chr(), ..., include = chr(), exclude = chr()) rename_vars(vars = chr(), ..., strict = TRUE) select_var(vars, var = -1) current_vars(...) # Deprecated in 1.0.0 ------------------------------------- bench_tbls(tbls, op, ..., times = 10) compare_tbls(tbls, op, ref = NULL, compare = equal_data_frame, ...) compare_tbls2(tbls_x, tbls_y, op, ref = NULL, compare = equal_data_frame, ...) eval_tbls(tbls, op) eval_tbls2(tbls_x, tbls_y, op) location(df) changes(x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/deprec-context.html","id":null,"dir":"Reference","previous_headings":"","what":"Information about the ","title":"Information about the ","text":"functions deprecated dplyr 1.1.0. cur_data() deprecated favor pick(). cur_data_all() deprecated direct replacement selecting grouping variables well-defined unlikely ever useful.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/deprec-context.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Information about the ","text":"","code":"cur_data() cur_data_all()"},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":null,"dir":"Reference","previous_headings":"","what":"Descending order — desc","title":"Descending order — desc","text":"Transform vector format sorted descending order. useful within arrange().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Descending order — desc","text":"","code":"desc(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Descending order — desc","text":"x vector transform","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/desc.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Descending order — desc","text":"","code":"desc(1:10) #> [1] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 desc(factor(letters)) #> [1] -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 #> [18] -18 -19 -20 -21 -22 -23 -24 -25 -26 first_day <- seq(as.Date(\"1910/1/1\"), as.Date(\"1920/1/1\"), \"years\") desc(first_day) #> [1] 21915 21550 21185 20819 20454 20089 19724 19358 18993 18628 18263 starwars %>% arrange(desc(mass)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba De… 175 1358 NA green-tan… orange 600 herm… #> 2 Grievous 216 159 none brown, wh… green, y… NA male #> 3 IG-88 200 140 none metal red 15 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Tarfful 234 136 brown brown blue NA male #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Bossk 190 113 none green red 53 male #> 8 Chewbacca 228 112 brown unknown blue 200 male #> 9 Jek Tono… 180 110 brown fair blue NA NA #> 10 Dexter J… 198 102 none brown yellow NA male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":null,"dir":"Reference","previous_headings":"","what":"Describing dimensions — dim_desc","title":"Describing dimensions — dim_desc","text":"Prints dimensions array-like object user-friendly manner, substituting NA ?? (SQL queries).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Describing dimensions — dim_desc","text":"","code":"dim_desc(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Describing dimensions — dim_desc","text":"x Object show dimensions .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dim_desc.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Describing dimensions — dim_desc","text":"","code":"dim_desc(mtcars) #> [1] \"[32 x 11]\""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":null,"dir":"Reference","previous_headings":"","what":"Keep distinct/unique rows — distinct","title":"Keep distinct/unique rows — distinct","text":"Keep unique/distinct rows data frame. similar unique.data.frame() considerably faster.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Keep distinct/unique rows — distinct","text":"","code":"distinct(.data, ..., .keep_all = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Keep distinct/unique rows — distinct","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Optional variables use determining uniqueness. multiple rows given combination inputs, first row preserved. omitted, use variables data frame. .keep_all TRUE, keep variables .data. combination ... distinct, keeps first row values.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Keep distinct/unique rows — distinct","text":"object type .data. output following properties: Rows subset input appear order. Columns modified ... empty .keep_all TRUE. Otherwise, distinct() first calls mutate() create new columns. Groups modified. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Keep distinct/unique rows — distinct","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Keep distinct/unique rows — distinct","text":"","code":"df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) nrow(df) #> [1] 100 nrow(distinct(df)) #> [1] 71 nrow(distinct(df, x, y)) #> [1] 71 distinct(df, x) #> # A tibble: 10 × 1 #> x #> #> 1 3 #> 2 1 #> 3 9 #> 4 10 #> 5 5 #> 6 2 #> 7 8 #> 8 4 #> 9 7 #> 10 6 distinct(df, y) #> # A tibble: 10 × 1 #> y #> #> 1 7 #> 2 6 #> 3 3 #> 4 4 #> 5 10 #> 6 9 #> 7 1 #> 8 2 #> 9 5 #> 10 8 # You can choose to keep all other variables as well distinct(df, x, .keep_all = TRUE) #> # A tibble: 10 × 2 #> x y #> #> 1 3 7 #> 2 1 6 #> 3 9 4 #> 4 10 3 #> 5 5 4 #> 6 2 10 #> 7 8 9 #> 8 4 1 #> 9 7 3 #> 10 6 7 distinct(df, y, .keep_all = TRUE) #> # A tibble: 10 × 2 #> x y #> #> 1 3 7 #> 2 1 6 #> 3 1 3 #> 4 9 4 #> 5 2 10 #> 6 8 9 #> 7 4 1 #> 8 5 2 #> 9 3 5 #> 10 3 8 # You can also use distinct on computed variables distinct(df, diff = abs(x - y)) #> # A tibble: 10 × 1 #> diff #> #> 1 4 #> 2 5 #> 3 2 #> 4 7 #> 5 1 #> 6 8 #> 7 3 #> 8 6 #> 9 0 #> 10 9 # Use `pick()` to select columns with tidy-select distinct(starwars, pick(contains(\"color\"))) #> # A tibble: 67 × 3 #> hair_color skin_color eye_color #> #> 1 blond fair blue #> 2 NA gold yellow #> 3 NA white, blue red #> 4 none white yellow #> 5 brown light brown #> 6 brown, grey light blue #> 7 brown light blue #> 8 NA white, red red #> 9 black light brown #> 10 auburn, white fair blue-gray #> # ℹ 57 more rows # Grouping ------------------------------------------------- df <- tibble( g = c(1, 1, 2, 2, 2), x = c(1, 1, 2, 1, 2), y = c(3, 2, 1, 3, 1) ) df <- df %>% group_by(g) # With grouped data frames, distinctness is computed within each group df %>% distinct(x) #> # A tibble: 3 × 2 #> # Groups: g [2] #> g x #> #> 1 1 1 #> 2 2 2 #> 3 2 1 # When `...` are omitted, `distinct()` still computes distinctness using # all variables in the data frame df %>% distinct() #> # A tibble: 4 × 3 #> # Groups: g [2] #> g x y #> #> 1 1 1 3 #> 2 1 1 2 #> 3 2 2 1 #> 4 2 1 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Select distinct rows by a selection of variables — distinct_all","title":"Select distinct rows by a selection of variables — distinct_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants distinct() extract distinct rows selection variables. Like distinct(), can modify variables ordering .funs argument.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select distinct rows by a selection of variables — distinct_all","text":"","code":"distinct_all(.tbl, .funs = list(), ..., .keep_all = FALSE) distinct_at(.tbl, .vars, .funs = list(), ..., .keep_all = FALSE) distinct_if(.tbl, .predicate, .funs = list(), ..., .keep_all = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select distinct rows by a selection of variables — distinct_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .keep_all TRUE, keep variables .data. combination ... distinct, keeps first row values. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Select distinct rows by a selection of variables — distinct_all","text":"grouping variables part selection taken account determine distinct rows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/distinct_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select distinct rows by a selection of variables — distinct_all","text":"","code":"df <- tibble(x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2) distinct_all(df) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # -> distinct(df, pick(everything())) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 distinct_at(df, vars(x,y)) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # -> distinct(df, pick(x, y)) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 distinct_if(df, is.numeric) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # -> distinct(df, pick(where(is.numeric))) #> # A tibble: 4 × 2 #> x y #> #> 1 1 1 #> 2 1.5 1 #> 3 2 1.5 #> 4 2.5 1.5 # You can supply a function that will be applied before extracting the distinct values # The variables of the sorted tibble keep their original values. distinct_all(df, round) #> # A tibble: 3 × 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 2 2 # -> distinct(df, across(everything(), round)) #> # A tibble: 3 × 2 #> x y #> #> 1 1 1 #> 2 2 1 #> 3 2 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":null,"dir":"Reference","previous_headings":"","what":"Do anything — do","title":"Do anything — do","text":"() superseded dplyr 1.0.0, syntax never really felt like belonged rest dplyr. replaced combination reframe() (can produce multiple rows multiple columns), nest_by() (creates rowwise tibble nested data), pick() (allows access data \"current\" group).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Do anything — do","text":"","code":"do(.data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Do anything — do","text":".data tbl ... Expressions apply group. named, results stored new column. unnamed, must return data frame. can use . refer current group. can mix named unnamed arguments.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/do.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Do anything — do","text":"","code":"# do() with unnamed arguments becomes reframe() or summarise() # . becomes pick() by_cyl <- mtcars %>% group_by(cyl) by_cyl %>% do(head(., 2)) #> # A tibble: 6 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 # -> by_cyl %>% reframe(head(pick(everything()), 2)) #> # A tibble: 6 × 11 #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 6 21 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 8 14.3 360 245 3.21 3.57 15.8 0 0 3 4 by_cyl %>% slice_head(n = 2) #> # A tibble: 6 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 # Can refer to variables directly by_cyl %>% do(mean = mean(.$vs)) #> # A tibble: 3 × 2 #> # Rowwise: #> cyl mean #> #> 1 4 #> 2 6 #> 3 8 # -> by_cyl %>% summarise(mean = mean(vs)) #> # A tibble: 3 × 2 #> cyl mean #> #> 1 4 0.909 #> 2 6 0.571 #> 3 8 0 # do() with named arguments becomes nest_by() + mutate() & list() models <- by_cyl %>% do(mod = lm(mpg ~ disp, data = .)) # -> models <- mtcars %>% nest_by(cyl) %>% mutate(mod = list(lm(mpg ~ disp, data = data))) models %>% summarise(rsq = summary(mod)$r.squared) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rsq #> #> 1 4 0.648 #> 2 6 0.0106 #> 3 8 0.270 # use broom to turn models into data models %>% do(data.frame( var = names(coef(.$mod)), coef(summary(.$mod))) ) #> # A tibble: 6 × 5 #> # Rowwise: #> var Estimate Std..Error t.value Pr...t.. #> #> 1 (Intercept) 40.9 3.59 11.4 0.00000120 #> 2 disp -0.135 0.0332 -4.07 0.00278 #> 3 (Intercept) 19.1 2.91 6.55 0.00124 #> 4 disp 0.00361 0.0156 0.232 0.826 #> 5 (Intercept) 22.0 3.35 6.59 0.0000259 #> 6 disp -0.0196 0.00932 -2.11 0.0568 # -> models %>% reframe(broom::tidy(mod)) #> # A tibble: 6 × 6 #> cyl term estimate std.error statistic p.value #> #> 1 4 (Intercept) 40.9 3.59 11.4 0.00000120 #> 2 4 disp -0.135 0.0332 -4.07 0.00278 #> 3 6 (Intercept) 19.1 2.91 6.55 0.00124 #> 4 6 disp 0.00361 0.0156 0.232 0.826 #> 5 8 (Intercept) 22.0 3.35 6.59 0.0000259 #> 6 8 disp -0.0196 0.00932 -2.11 0.0568"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":null,"dir":"Reference","previous_headings":"","what":"Locale used by arrange() — dplyr-locale","title":"Locale used by arrange() — dplyr-locale","text":"page documents details locale used arrange() ordering character vectors.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"default-locale","dir":"Reference","previous_headings":"","what":"Default locale","title":"Locale used by arrange() — dplyr-locale","text":"default locale used arrange() C locale. used .locale = NULL unless dplyr.legacy_locale global option set TRUE. can also force C locale used unconditionally .locale = \"C\". C locale exactly English locales, \"en\". main difference C locale groups English alphabet case, English locales group alphabet letter. example, c(\"\", \"b\", \"C\", \"B\", \"c\") sort c(\"B\", \"C\", \"\", \"b\", \"c\") C locale, uppercase letters coming lowercase letters, sort c(\"\", \"b\", \"B\", \"c\", \"C\") English locale. often makes little practical difference data analysis, return identical results case consistent observations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"reproducibility","dir":"Reference","previous_headings":"","what":"Reproducibility","title":"Locale used by arrange() — dplyr-locale","text":"C locale benefit completely reproducible across supported R versions operating systems extra effort. set .locale option stringi::stri_locale_list(), stringi must installed anyone wants run code. utilize package, stringi placed Imports.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"legacy-behavior","dir":"Reference","previous_headings":"","what":"Legacy behavior","title":"Locale used by arrange() — dplyr-locale","text":"Prior dplyr 1.1.0, character columns ordered system locale. need temporarily revert behavior, can set global option dplyr.legacy_locale TRUE, used sparingly expect option removed future version dplyr. better update existing code explicitly use .locale instead. Note setting dplyr.legacy_locale also force calls group_by() use system locale internally ordering groups. Setting .locale override usage dplyr.legacy_locale.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-locale.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Locale used by arrange() — dplyr-locale","text":"","code":"df <- tibble(x = c(\"a\", \"b\", \"C\", \"B\", \"c\")) df #> # A tibble: 5 × 1 #> x #> #> 1 a #> 2 b #> 3 C #> 4 B #> 5 c # Default locale is C, which groups the English alphabet by case, placing # uppercase letters before lowercase letters. arrange(df, x) #> # A tibble: 5 × 1 #> x #> #> 1 B #> 2 C #> 3 a #> 4 b #> 5 c # The American English locale groups the alphabet by letter. # Explicitly override `.locale` with `\"en\"` for this ordering. arrange(df, x, .locale = \"en\") #> # A tibble: 5 × 1 #> x #> #> 1 a #> 2 b #> 3 B #> 4 c #> 5 C # This Danish letter is expected to sort after `z` df <- tibble(x = c(\"o\", \"p\", \"\\u00F8\", \"z\")) df #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 p #> 3 ø #> 4 z # The American English locale sorts it right after `o` arrange(df, x, .locale = \"en\") #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 ø #> 3 p #> 4 z # Using `\"da\"` for Danish ordering gives the expected result arrange(df, x, .locale = \"da\") #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 p #> 3 z #> 4 ø # If you need the legacy behavior of `arrange()`, which respected the # system locale, then you can set the global option `dplyr.legacy_locale`, # but expect this to be removed in the future. We recommend that you use # the `.locale` argument instead. rlang::with_options(dplyr.legacy_locale = TRUE, { arrange(df, x) }) #> # A tibble: 4 × 1 #> x #> #> 1 o #> 2 p #> 3 z #> 4 ø"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-package.html","id":null,"dir":"Reference","previous_headings":"","what":"dplyr: A Grammar of Data Manipulation — dplyr-package","title":"dplyr: A Grammar of Data Manipulation — dplyr-package","text":"learn dplyr, start vignettes: browseVignettes(package = \"dplyr\")","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"dplyr: A Grammar of Data Manipulation — dplyr-package","text":"Maintainer: Hadley Wickham hadley@posit.co (ORCID) Authors: Romain François (ORCID) Lionel Henry Kirill Müller (ORCID) Davis Vaughan davis@posit.co (ORCID) contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Per-operation grouping with .by/by — dplyr_by","title":"Per-operation grouping with .by/by — dplyr_by","text":"two ways group dplyr: Persistent grouping group_by() Per-operation grouping ./help page dedicated explaining might want use latter. Depending dplyr verb, per-operation grouping argument may named .. Supported verbs section outlines case--case basis. remainder page refer .simplicity. Grouping radically affects computation dplyr verb use , one goals .allow place grouping specification alongside code actually uses . added benefit, .longer need remember ungroup() summarise(), summarise() ever message handling groups! idea comes data.table, allows specify alongside modifications j, like: dt[, .(x = mean(x)), = g].","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"supported-verbs","dir":"Reference","previous_headings":"","what":"Supported verbs","title":"Per-operation grouping with .by/by — dplyr_by","text":"mutate(.= ) summarise(.= ) reframe(.= ) filter(.= ) slice(.= ) slice_head(= ) slice_tail(= ) slice_min(= ) slice_max(= ) slice_sample(= ) Note dplyr verbs use others use .. purely technical difference.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"using-by","dir":"Reference","previous_headings":"","what":"Using .by","title":"Per-operation grouping with .by/by — dplyr_by","text":"take look two grouping approaches using expenses data set, tracks costs accumulated across various ids regions: Imagine wanted compute average cost per region. probably write something like : Instead, can now specify grouping inline within verb: .applies single operation, meaning since expenses ungrouped data frame, result applying .also always ungrouped data frame, regardless number grouping columns. Compare group_by() %>% summarise(), summarise() generally peels 1 layer grouping default, typically message : .grouping applies single operation, need worry ungrouping, never needs emit message remind groups. Note .specified multiple columns group using tidy-select syntax c(id, region). character vector column names like group , can .= all_of(my_cols). group columns order provided. prevent surprising results, use .existing grouped data frame: far focused usage .summarise(), .works number dplyr verbs. example, append mean cost per region onto original data frame new column rather computing summary: slice maximum cost per combination id region:","code":"expenses <- tibble( id = c(1, 2, 1, 3, 1, 2, 3), region = c(\"A\", \"A\", \"A\", \"B\", \"B\", \"A\", \"A\"), cost = c(25, 20, 19, 12, 9, 6, 6) ) expenses #> # A tibble: 7 x 3 #> id region cost #> #> 1 1 A 25 #> 2 2 A 20 #> 3 1 A 19 #> 4 3 B 12 #> 5 1 B 9 #> 6 2 A 6 #> 7 3 A 6 expenses %>% group_by(region) %>% summarise(cost = mean(cost)) #> # A tibble: 2 x 2 #> region cost #> #> 1 A 15.2 #> 2 B 10.5 expenses %>% summarise(cost = mean(cost), .by = region) #> # A tibble: 2 x 2 #> region cost #> #> 1 A 15.2 #> 2 B 10.5 expenses %>% summarise(cost = mean(cost), .by = c(id, region)) #> # A tibble: 5 x 3 #> id region cost #> #> 1 1 A 22 #> 2 2 A 13 #> 3 3 B 12 #> 4 1 B 9 #> 5 3 A 6 expenses %>% group_by(id, region) %>% summarise(cost = mean(cost)) #> `summarise()` has grouped output by 'id'. You can override using the `.groups` #> argument. #> # A tibble: 5 x 3 #> # Groups: id [3] #> id region cost #> #> 1 1 A 22 #> 2 1 B 9 #> 3 2 A 13 #> 4 3 A 6 #> 5 3 B 12 expenses %>% group_by(id) %>% summarise(cost = mean(cost), .by = c(id, region)) #> Error in `summarise()`: #> ! Can't supply `.by` when `.data` is a grouped data frame. expenses %>% mutate(cost_by_region = mean(cost), .by = region) #> # A tibble: 7 x 4 #> id region cost cost_by_region #> #> 1 1 A 25 15.2 #> 2 2 A 20 15.2 #> 3 1 A 19 15.2 #> 4 3 B 12 10.5 #> 5 1 B 9 10.5 #> 6 2 A 6 15.2 #> 7 3 A 6 15.2 # Note that the argument is named `by` in `slice_max()` expenses %>% slice_max(cost, n = 1, by = c(id, region)) #> # A tibble: 5 x 3 #> id region cost #> #> 1 1 A 25 #> 2 2 A 20 #> 3 3 B 12 #> 4 1 B 9 #> 5 3 A 6"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"result-ordering","dir":"Reference","previous_headings":"","what":"Result ordering","title":"Per-operation grouping with .by/by — dplyr_by","text":"used ., summarise(), reframe(), slice() maintain ordering existing data. different group_by(), always sorted group keys ascending order. need sorted group keys, recommend explicitly use arrange() either call summarise(), reframe(), slice(). also gives full access arrange()'s features, desc() .locale argument.","code":"df <- tibble( month = c(\"jan\", \"jan\", \"feb\", \"feb\", \"mar\"), temp = c(20, 25, 18, 20, 40) ) # Uses ordering by \"first appearance\" in the original data df %>% summarise(average_temp = mean(temp), .by = month) #> # A tibble: 3 x 2 #> month average_temp #> #> 1 jan 22.5 #> 2 feb 19 #> 3 mar 40 # Sorts in ascending order df %>% group_by(month) %>% summarise(average_temp = mean(temp)) #> # A tibble: 3 x 2 #> month average_temp #> #> 1 feb 19 #> 2 jan 22.5 #> 3 mar 40"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_by.html","id":"verbs-without-by-support","dir":"Reference","previous_headings":"","what":"Verbs without .by support","title":"Per-operation grouping with .by/by — dplyr_by","text":"dplyr verb support ., typically means verb inherently affected grouping. example, pull() rename() support ., specifying columns group affect implementations. said, exceptions sometimes dplyr verb support ., special support grouped data frames created group_by(). typically verbs required retain grouping columns, example: select() always retains grouping columns, message specified select() call. distinct() count() place unspecified grouping columns front data frame computing results. arrange() .by_group argument optionally order grouping columns first. group_by() exist, verbs special support grouped data frames.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_data_masking.html","id":null,"dir":"Reference","previous_headings":"","what":"Data-masking — dplyr_data_masking","title":"Data-masking — dplyr_data_masking","text":"page now located ?rlang::args_data_masking.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":null,"dir":"Reference","previous_headings":"","what":"Extending dplyr with new data frame subclasses — dplyr_extending","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"three functions, along names<- 1d numeric [ (.e. x[loc]) methods, provide minimal interface extending dplyr work new data frame subclasses. means simple cases need provide couple methods, rather method every dplyr verb. functions stop-gap measure figure solve problem generally, likely code write implement find home comes next.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"","code":"dplyr_row_slice(data, i, ...) dplyr_col_modify(data, cols) dplyr_reconstruct(data, template)"},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"data tibble. use tibbles avoid inconsistent subset-assignment use cases. numeric logical vector indexes rows data. cols named list used modify columns. NULL value remove existing column. template Template data frame use restoring attributes.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"basic-advice","dir":"Reference","previous_headings":"","what":"Basic advice","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"section gives basic advice want extend dplyr work custom data frame subclass, want dplyr methods behave basically way. data frame attributes depend rows columns (unconditionally preserved), need anything. one exception subclass extends data.frame directly rather extending tibble. [.data.frame method preserve attributes, need write [ method subclass preserves attributes important class. scalar attributes depend rows, implement dplyr_reconstruct() method. method recompute attribute depending rows now present. scalar attributes depend columns, implement dplyr_reconstruct() method 1d [ method. example, class requires certain columns present, method return data.frame tibble columns removed. attributes vectorised rows, implement dplyr_row_slice() method. gives access can modify row attribute accordingly. also need think carefully recompute attribute dplyr_reconstruct(), need carefully verify behaviour verb, provide additional methods needed. attributes vectorised columns, implement dplyr_col_modify(), 1d [, names<- methods. methods know columns modified, can update column attribute according. also need think carefully recompute attribute dplyr_reconstruct(), need carefully verify behaviour verb, provide additional methods needed.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_extending.html","id":"current-usage","dir":"Reference","previous_headings":"","what":"Current usage","title":"Extending dplyr with new data frame subclasses — dplyr_extending","text":"arrange(), filter(), slice() (rest slice_*() family), semi_join(), anti_join() work generating vector row indices, subsetting dplyr_row_slice(). mutate() generates list new column value (using NULL indicate columns deleted), passes dplyr_col_modify(). also uses 1d [ implement .keep, call relocate() either ..supplied. summarise() reframe() work similarly mutate() data modified dplyr_col_modify() comes group_data() built .. Note means data frames returned summarise() reframe() fundamentally new data frames, retain custom subclasses attributes. select() uses 1d [ select columns, names<- rename . rename() just uses names<-. relocate() just uses 1d [. inner_join(), left_join(), right_join(), full_join() coerce x tibble, modify rows, use dplyr_reconstruct() convert back type x. nest_join() converts x y tibbles, modifies rows, uses dplyr_col_modify() handle modified key variables list-column y becomes. also uses dplyr_reconstruct() convert outer result back type x, convert nested tibbles back type y. distinct() mutate() expressions present, uses 1d [ select variables keep, dplyr_row_slice() select distinct rows. Note group_by() ungroup() use generics need provide methods directly, rely .per-operation grouping.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_tidy_select.html","id":null,"dir":"Reference","previous_headings":"","what":"Argument type: tidy-select — dplyr_tidy_select","title":"Argument type: tidy-select — dplyr_tidy_select","text":"page describes argument modifier indicates argument supports tidy selections. Tidy selection provides concise dialect R selecting variables based names properties. Tidy selection variant tidy evaluation. means inside functions, tidy-select arguments require special attention, described Indirection section . never heard tidy evaluation , start vignette(\"programming\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_tidy_select.html","id":"overview-of-selection-features","dir":"Reference","previous_headings":"","what":"Overview of selection features","title":"Argument type: tidy-select — dplyr_tidy_select","text":"Tidyverse selections implement dialect R operators make easy select variables: : selecting range consecutive variables. ! taking complement set variables. & | selecting intersection union two sets variables. c() combining selections. addition, can use selection helpers. helpers select specific columns: everything(): Matches variables. last_col(): Select last variable, possibly offset. group_cols(): Select grouping columns. helpers select variables matching patterns names: starts_with(): Starts prefix. ends_with(): Ends suffix. contains(): Contains literal string. matches(): Matches regular expression. num_range(): Matches numerical range like x01, x02, x03. variables stored character vector: all_of(): Matches variable names character vector. names must present, otherwise --bounds error thrown. any_of(): all_of(), except error thrown names exist. using predicate function: (): Applies function variables selects function returns TRUE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/dplyr_tidy_select.html","id":"indirection","dir":"Reference","previous_headings":"","what":"Indirection","title":"Argument type: tidy-select — dplyr_tidy_select","text":"two main cases: character vector column names, use all_of() any_of(), depending whether want unknown variable names cause error, e.g. select(df, all_of(vars)), select(df, !any_of(vars)). want user able supply tidyselect specification function argument, embrace function argument, e.g. select(df, {{ vars }}).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":null,"dir":"Reference","previous_headings":"","what":"Explain details of a tbl — explain","title":"Explain details of a tbl — explain","text":"generic function gives details object print(), focused human readable output str().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Explain details of a tbl — explain","text":"","code":"explain(x, ...) show_query(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Explain details of a tbl — explain","text":"x object explain ... parameters possibly used generic","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Explain details of a tbl — explain","text":"first argument, invisibly.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"databases","dir":"Reference","previous_headings":"","what":"Databases","title":"Explain details of a tbl — explain","text":"Explaining tbl_sql run SQL EXPLAIN command describe query plan. requires little bit knowledge EXPLAIN works database, useful diagnosing performance problems.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/explain.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Explain details of a tbl — explain","text":"","code":"# \\donttest{ lahman_s <- dbplyr::lahman_sqlite() #> Creating table: AllstarFull #> Creating table: Appearances #> Creating table: AwardsManagers #> Creating table: AwardsPlayers #> Creating table: AwardsShareManagers #> Creating table: AwardsSharePlayers #> Creating table: Batting #> Creating table: BattingPost #> Creating table: CollegePlaying #> Creating table: Fielding #> Creating table: FieldingOF #> Creating table: FieldingOFsplit #> Creating table: FieldingPost #> Creating table: HallOfFame #> Creating table: HomeGames #> Creating table: LahmanData #> Creating table: Managers #> Creating table: ManagersHalf #> Creating table: Parks #> Creating table: People #> Creating table: Pitching #> Creating table: PitchingPost #> Creating table: Salaries #> Creating table: Schools #> Creating table: SeriesPost #> Creating table: Teams #> Creating table: TeamsFranchises #> Creating table: TeamsHalf batting <- tbl(lahman_s, \"Batting\") batting %>% show_query() #> #> SELECT * #> FROM `Batting` batting %>% explain() #> #> SELECT * #> FROM `Batting` #> #> #> id parent notused detail #> 1 2 0 0 SCAN Batting # The batting database has indices on all ID variables: # SQLite automatically picks the most restrictive index batting %>% filter(lgID == \"NL\" & yearID == 2000L) %>% explain() #> #> SELECT `Batting`.* #> FROM `Batting` #> WHERE (`lgID` = 'NL' AND `yearID` = 2000) #> #> #> id parent notused detail #> 1 3 0 0 SEARCH Batting USING INDEX Batting_yearID (yearID=?) # OR's will use multiple indexes batting %>% filter(lgID == \"NL\" | yearID == 2000) %>% explain() #> #> SELECT `Batting`.* #> FROM `Batting` #> WHERE (`lgID` = 'NL' OR `yearID` = 2000.0) #> #> #> id parent notused detail #> 1 4 0 0 MULTI-INDEX OR #> 2 5 4 0 INDEX 1 #> 3 11 5 0 SEARCH Batting USING INDEX Batting_lgID (lgID=?) #> 4 16 4 0 INDEX 2 #> 5 22 16 0 SEARCH Batting USING INDEX Batting_yearID (yearID=?) # Joins will use indexes in both tables teams <- tbl(lahman_s, \"Teams\") batting %>% left_join(teams, c(\"yearID\", \"teamID\")) %>% explain() #> #> SELECT #> `playerID`, #> `Batting`.`yearID` AS `yearID`, #> `stint`, #> `Batting`.`teamID` AS `teamID`, #> `Batting`.`lgID` AS `lgID.x`, #> `Batting`.`G` AS `G.x`, #> `Batting`.`AB` AS `AB.x`, #> `Batting`.`R` AS `R.x`, #> `Batting`.`H` AS `H.x`, #> `Batting`.`X2B` AS `X2B.x`, #> `Batting`.`X3B` AS `X3B.x`, #> `Batting`.`HR` AS `HR.x`, #> `RBI`, #> `Batting`.`SB` AS `SB.x`, #> `Batting`.`CS` AS `CS.x`, #> `Batting`.`BB` AS `BB.x`, #> `Batting`.`SO` AS `SO.x`, #> `IBB`, #> `Batting`.`HBP` AS `HBP.x`, #> `SH`, #> `Batting`.`SF` AS `SF.x`, #> `GIDP`, #> `Teams`.`lgID` AS `lgID.y`, #> `franchID`, #> `divID`, #> `Rank`, #> `Teams`.`G` AS `G.y`, #> `Ghome`, #> `W`, #> `L`, #> `DivWin`, #> `WCWin`, #> `LgWin`, #> `WSWin`, #> `Teams`.`R` AS `R.y`, #> `Teams`.`AB` AS `AB.y`, #> `Teams`.`H` AS `H.y`, #> `Teams`.`X2B` AS `X2B.y`, #> `Teams`.`X3B` AS `X3B.y`, #> `Teams`.`HR` AS `HR.y`, #> `Teams`.`BB` AS `BB.y`, #> `Teams`.`SO` AS `SO.y`, #> `Teams`.`SB` AS `SB.y`, #> `Teams`.`CS` AS `CS.y`, #> `Teams`.`HBP` AS `HBP.y`, #> `Teams`.`SF` AS `SF.y`, #> `RA`, #> `ER`, #> `ERA`, #> `CG`, #> `SHO`, #> `SV`, #> `IPouts`, #> `HA`, #> `HRA`, #> `BBA`, #> `SOA`, #> `E`, #> `DP`, #> `FP`, #> `name`, #> `park`, #> `attendance`, #> `BPF`, #> `PPF`, #> `teamIDBR`, #> `teamIDlahman45`, #> `teamIDretro` #> FROM `Batting` #> LEFT JOIN `Teams` #> ON ( #> `Batting`.`yearID` = `Teams`.`yearID` AND #> `Batting`.`teamID` = `Teams`.`teamID` #> ) #> #> #> id parent notused #> 1 4 0 0 #> 2 6 0 0 #> detail #> 1 SCAN Batting #> 2 SEARCH Teams USING INDEX Teams_yearID (yearID=?) LEFT-JOIN # }"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering joins — filter-joins","title":"Filtering joins — filter-joins","text":"Filtering joins filter rows x based presence absence matches y: semi_join() return rows x match y. anti_join() return rows x without match y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering joins — filter-joins","text":"","code":"semi_join(x, y, by = NULL, copy = FALSE, ...) # S3 method for data.frame semi_join(x, y, by = NULL, copy = FALSE, ..., na_matches = c(\"na\", \"never\")) anti_join(x, y, by = NULL, copy = FALSE, ...) # S3 method for data.frame anti_join(x, y, by = NULL, copy = FALSE, ..., na_matches = c(\"na\", \"never\"))"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering joins — filter-joins","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. join specification created join_by(), character vector variables join . NULL, default, *_join() perform natural join, using variables common across x y. message lists variables can check correct; suppress message supplying explicitly. join different variables x y, use join_by() specification. example, join_by(== b) match x$y$b. join multiple variables, use join_by() specification multiple expressions. example, join_by(== b, c == d) match x$y$b x$c y$d. column names x y, can shorten listing variable names, like join_by(, c). join_by() can also used perform inequality, rolling, overlap joins. See documentation ?join_by details types joins. simple equality joins, can alternatively specify character vector variable names join . example, = c(\"\", \"b\") joins x$y$x$b y$b. variable names differ x y, use named character vector like = c(\"x_a\" = \"y_a\", \"x_b\" = \"y_b\"). perform cross-join, generating combinations x y, see cross_join(). copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . ... parameters passed onto methods. na_matches two NA two NaN values match? \"na\", default, treats two NA two NaN values equal, like %%, match(), merge(). \"never\" treats two NA two NaN values different, never match together values. similar joins database sources base::merge(incomparables = NA).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering joins — filter-joins","text":"object type x. output following properties: Rows subset input, appear order. Columns modified. Data frame attributes preserved. Groups taken x. number groups may reduced.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Filtering joins — filter-joins","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: semi_join(): dbplyr (tbl_lazy), dplyr (data.frame) . anti_join(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/filter-joins.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filtering joins — filter-joins","text":"","code":"# \"Filtering\" joins keep cases from the LHS band_members %>% semi_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles band_members %>% anti_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 1 × 2 #> name band #> #> 1 Mick Stones # To suppress the message about joining variables, supply `by` band_members %>% semi_join(band_instruments, by = join_by(name)) #> # A tibble: 2 × 2 #> name band #> #> 1 John Beatles #> 2 Paul Beatles # This is good practice in production code"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":null,"dir":"Reference","previous_headings":"","what":"Keep rows that match a condition — filter","title":"Keep rows that match a condition — filter","text":"filter() function used subset data frame, retaining rows satisfy conditions. retained, row must produce value TRUE conditions. Note condition evaluates NA row dropped, unlike base subsetting [.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Keep rows that match a condition — filter","text":"","code":"filter(.data, ..., .by = NULL, .preserve = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Keep rows that match a condition — filter","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Expressions return logical value, defined terms variables .data. multiple expressions included, combined & operator. rows conditions evaluate TRUE kept. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .preserve Relevant .data input grouped. .preserve = FALSE (default), grouping structure recalculated based resulting data, otherwise grouping kept .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Keep rows that match a condition — filter","text":"object type .data. output following properties: Rows subset input, appear order. Columns modified. number groups may reduced (.preserve TRUE). Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Keep rows that match a condition — filter","text":"filter() function used subset rows .data, applying expressions ... column values determine rows retained. can applied grouped ungrouped data (see group_by() ungroup()). However, dplyr yet smart enough optimise filtering operation grouped datasets need grouped calculations. reason, filtering often considerably faster ungrouped data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"useful-filter-functions","dir":"Reference","previous_headings":"","what":"Useful filter functions","title":"Keep rows that match a condition — filter","text":"many functions operators useful constructing expressions used filter data: ==, >, >= etc &, |, !, xor() .na() (), near()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"grouped-tibbles","dir":"Reference","previous_headings":"","what":"Grouped tibbles","title":"Keep rows that match a condition — filter","text":"filtering expressions computed within groups, may yield different results grouped tibbles. case soon aggregating, lagging, ranking function involved. Compare ungrouped filtering: grouped equivalent: ungrouped version, filter() compares value mass row global average (taken whole data set), keeping rows mass greater global average. contrast, grouped version calculates average mass separately gender group, keeps rows mass greater relevant within-gender average.","code":"starwars %>% filter(mass > mean(mass, na.rm = TRUE)) starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE))"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Keep rows that match a condition — filter","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame, ts) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/filter.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Keep rows that match a condition — filter","text":"","code":"# Filtering by one criterion filter(starwars, species == \"Human\") #> # A tibble: 35 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 Darth Va… 202 136 none white yellow 41.9 male #> 3 Leia Org… 150 49 brown light brown 19 fema… #> 4 Owen Lars 178 120 brown, gr… light blue 52 male #> 5 Beru Whi… 165 75 brown light blue 47 fema… #> 6 Biggs Da… 183 84 black light brown 24 male #> 7 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> 8 Anakin S… 188 84 blond fair blue 41.9 male #> 9 Wilhuff … 180 NA auburn, g… fair blue 64 male #> 10 Han Solo 180 80 brown fair brown 29 male #> # ℹ 25 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships filter(starwars, mass > 1000) #> # A tibble: 1 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Jabba Des… 175 1358 NA green-tan… orange 600 herm… #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # Filtering by multiple criteria within a single logical expression filter(starwars, hair_color == \"none\" & eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, bl… black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> 5 Lama Su 229 88 none grey black NA male #> 6 Taun We 213 NA none grey black NA fema… #> 7 Shaak Ti 178 57 none red, blue… black NA fema… #> 8 Tion Medon 206 80 none grey black NA male #> 9 BB8 NA NA none none black NA none #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships filter(starwars, hair_color == \"none\" | eye_color == \"black\") #> # A tibble: 39 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth Va… 202 136 none white yellow 41.9 male #> 2 Greedo 173 74 NA green black 44 male #> 3 IG-88 200 140 none metal red 15 none #> 4 Bossk 190 113 none green red 53 male #> 5 Lobot 175 79 none light blue 37 male #> 6 Ackbar 180 83 none brown mot… orange 41 male #> 7 Nien Nunb 160 68 none grey black NA male #> 8 Nute Gun… 191 90 none mottled g… red NA male #> 9 Jar Jar … 196 66 none orange orange 52 male #> 10 Roos Tar… 224 82 none grey orange NA male #> # ℹ 29 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # When multiple expressions are used, they are combined using & filter(starwars, hair_color == \"none\", eye_color == \"black\") #> # A tibble: 9 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Nien Nunb 160 68 none grey black NA male #> 2 Gasgano 122 NA none white, bl… black NA male #> 3 Kit Fisto 196 87 none green black NA male #> 4 Plo Koon 188 80 none orange black 22 male #> 5 Lama Su 229 88 none grey black NA male #> 6 Taun We 213 NA none grey black NA fema… #> 7 Shaak Ti 178 57 none red, blue… black NA fema… #> 8 Tion Medon 206 80 none grey black NA male #> 9 BB8 NA NA none none black NA none #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # The filtering operation may yield different results on grouped # tibbles because the expressions are computed within groups. # # The following filters rows where `mass` is greater than the # global average: starwars %>% filter(mass > mean(mass, na.rm = TRUE)) #> # A tibble: 10 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth Va… 202 136 none white yellow 41.9 male #> 2 Owen Lars 178 120 brown, gr… light blue 52 male #> 3 Chewbacca 228 112 brown unknown blue 200 male #> 4 Jabba De… 175 1358 NA green-tan… orange 600 herm… #> 5 Jek Tono… 180 110 brown fair blue NA NA #> 6 IG-88 200 140 none metal red 15 none #> 7 Bossk 190 113 none green red 53 male #> 8 Dexter J… 198 102 none brown yellow NA male #> 9 Grievous 216 159 none brown, wh… green, y… NA male #> 10 Tarfful 234 136 brown brown blue NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # Whereas this keeps rows with `mass` greater than the gender # average: starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) #> # A tibble: 15 × 14 #> # Groups: gender [3] #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth V… 202 136 none white yellow 41.9 male #> 2 Owen La… 178 120 brown, gr… light blue 52 male #> 3 Beru Wh… 165 75 brown light blue 47 fema… #> 4 Chewbac… 228 112 brown unknown blue 200 male #> 5 Jabba D… 175 1358 NA green-tan… orange 600 herm… #> 6 Jek Ton… 180 110 brown fair blue NA NA #> 7 IG-88 200 140 none metal red 15 none #> 8 Bossk 190 113 none green red 53 male #> 9 Ayla Se… 178 55 none blue hazel 48 fema… #> 10 Gregar … 185 85 black dark brown NA NA #> 11 Luminar… 170 56.2 black yellow blue 58 fema… #> 12 Zam Wes… 168 55 blonde fair, gre… yellow NA fema… #> 13 Shaak Ti 178 57 none red, blue… black NA fema… #> 14 Grievous 216 159 none brown, wh… green, y… NA male #> 15 Tarfful 234 136 brown brown blue NA male #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # To refer to column names that are stored as strings, use the `.data` pronoun: vars <- c(\"mass\", \"height\") cond <- c(80, 150) starwars %>% filter( .data[[vars[[1]]]] > cond[[1]], .data[[vars[[2]]]] > cond[[2]] ) #> # A tibble: 21 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Darth Va… 202 136 none white yellow 41.9 male #> 2 Owen Lars 178 120 brown, gr… light blue 52 male #> 3 Biggs Da… 183 84 black light brown 24 male #> 4 Anakin S… 188 84 blond fair blue 41.9 male #> 5 Chewbacca 228 112 brown unknown blue 200 male #> 6 Jabba De… 175 1358 NA green-tan… orange 600 herm… #> 7 Jek Tono… 180 110 brown fair blue NA NA #> 8 IG-88 200 140 none metal red 15 none #> 9 Bossk 190 113 none green red 53 male #> 10 Ackbar 180 83 none brown mot… orange 41 male #> # ℹ 11 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # Learn more in ?rlang::args_data_masking"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Filter within a selection of variables — filter_all","title":"Filter within a selection of variables — filter_all","text":"Scoped verbs (_if, _at, _all) superseded use if_all() if_any() existing verb. See vignette(\"colwise\") details. scoped filtering verbs apply predicate expression selection variables. predicate expression quoted all_vars() any_vars() mention pronoun . refer variables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filter within a selection of variables — filter_all","text":"","code":"filter_all(.tbl, .vars_predicate, .preserve = FALSE) filter_if(.tbl, .predicate, .vars_predicate, .preserve = FALSE) filter_at(.tbl, .vars, .vars_predicate, .preserve = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filter within a selection of variables — filter_all","text":".tbl tbl object. .vars_predicate quoted predicate expression returned all_vars() any_vars(). Can also function purrr-like formula. case, intersection results taken default currently way request union. .preserve FALSE (default), grouping structure recalculated based resulting data, otherwise kept . .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Filter within a selection of variables — filter_all","text":"grouping variables part selection taken account determine filtered rows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/filter_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filter within a selection of variables — filter_all","text":"","code":"# While filter() accepts expressions with specific variables, the # scoped filter verbs take an expression with the pronoun `.` and # replicate it over all variables. This expression should be quoted # with all_vars() or any_vars(): all_vars(is.na(.)) #> #> #> expr: ^is.na(.) #> env: 0x5596aadf3920 any_vars(is.na(.)) #> #> #> expr: ^is.na(.) #> env: 0x5596aadf3920 # You can take the intersection of the replicated expressions: filter_all(mtcars, all_vars(. > 150)) #> [1] mpg cyl disp hp drat wt qsec vs am gear carb #> <0 rows> (or 0-length row.names) # -> filter(mtcars, if_all(everything(), ~ .x > 150)) #> [1] mpg cyl disp hp drat wt qsec vs am gear carb #> <0 rows> (or 0-length row.names) # Or the union: filter_all(mtcars, any_vars(. > 150)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 # -> filter(mtcars, if_any(everything(), ~ . > 150)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 # You can vary the selection of columns on which to apply the # predicate. filter_at() takes a vars() specification: filter_at(mtcars, vars(starts_with(\"d\")), any_vars((. %% 2) == 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400 175 3.08 3.845 17.05 0 0 3 2 # -> filter(mtcars, if_any(starts_with(\"d\"), ~ (.x %% 2) == 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 #> Dodge Challenger 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400 175 3.08 3.845 17.05 0 0 3 2 # And filter_if() selects variables with a predicate function: filter_if(mtcars, ~ all(floor(.) == .), all_vars(. != 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # -> is_int <- function(x) all(floor(x) == x) filter(mtcars, if_all(where(is_int), ~ .x != 0)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a list of function calls — funs","title":"Create a list of function calls — funs","text":"funs() deprecated; please use list() instead. deprecated function provided unique way specifying anonymous functions, rather adopting conventions used purrr packages tidyverse.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a list of function calls — funs","text":"","code":"funs(..., .args = list())"},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a list of function calls — funs","text":"... list functions specified : name, \"mean\" function , mean call function . dummy argument, mean(., na.rm = TRUE) following notations supported, see examples: anonymous function, function(x) mean(x, na.rm = TRUE) anonymous function purrr notation, ~mean(., na.rm = TRUE) .args, args named list additional arguments added function calls. funs() deprecated, use methods supply arguments: ... argument scoped verbs make functions purrr::partial().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/funs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create a list of function calls — funs","text":"","code":"funs(\"mean\", mean(., na.rm = TRUE)) #> Warning: `funs()` was deprecated in dplyr 0.8.0. #> ℹ Please use a list of either functions or lambdas: #> #> # Simple named list: list(mean = mean, median = median) #> #> # Auto named with `tibble::lst()`: tibble::lst(mean, median) #> #> # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE)) #> #> $ mean: mean(.) #> $ mean: mean(., na.rm = TRUE) # -> list(mean = mean, mean = ~ mean(.x, na.rm = TRUE)) #> $mean #> function (x, ...) #> UseMethod(\"mean\") #> #> #> #> $mean #> ~mean(.x, na.rm = TRUE) #> #> funs(m1 = mean, m2 = \"mean\", m3 = mean(., na.rm = TRUE)) #> Warning: `funs()` was deprecated in dplyr 0.8.0. #> ℹ Please use a list of either functions or lambdas: #> #> # Simple named list: list(mean = mean, median = median) #> #> # Auto named with `tibble::lst()`: tibble::lst(mean, median) #> #> # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE)) #> #> $ m1: mean(.) #> $ m2: mean(.) #> $ m3: mean(., na.rm = TRUE) # -> list(m1 = mean, m2 = \"mean\", m3 = ~ mean(.x, na.rm = TRUE)) #> $m1 #> function (x, ...) #> UseMethod(\"mean\") #> #> #> #> $m2 #> [1] \"mean\" #> #> $m3 #> ~mean(.x, na.rm = TRUE) #> #>"},{"path":"https://dplyr.tidyverse.org/dev/reference/glimpse.html","id":null,"dir":"Reference","previous_headings":"","what":"Get a glimpse of your data — glimpse","title":"Get a glimpse of your data — glimpse","text":"glimpse() like transposed version print(): columns run page, data runs across. makes possible see every column data frame. little like str() applied data frame tries show much data possible. (always shows underlying data, even applied remote data source.) glimpse() provided pillar package, re-exported dplyr. See pillar::glimpse() details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/glimpse.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get a glimpse of your data — glimpse","text":"x original x (invisibly) returned, allowing glimpse() used within data pipeline.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/glimpse.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get a glimpse of your data — glimpse","text":"","code":"glimpse(mtcars) #> Rows: 32 #> Columns: 11 #> $ mpg 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2,… #> $ cyl 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4,… #> $ disp 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140… #> $ hp 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 18… #> $ drat 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92,… #> $ wt 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.1… #> $ qsec 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.… #> $ vs 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ am 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ gear 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4,… #> $ carb 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,… # Note that original x is (invisibly) returned, allowing `glimpse()` to be # used within a pipeline. mtcars %>% glimpse() %>% select(1:3) #> Rows: 32 #> Columns: 11 #> $ mpg 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2,… #> $ cyl 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4,… #> $ disp 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140… #> $ hp 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 18… #> $ drat 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92,… #> $ wt 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.1… #> $ qsec 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.… #> $ vs 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ am 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,… #> $ gear 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4,… #> $ carb 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,… #> mpg cyl disp #> Mazda RX4 21.0 6 160.0 #> Mazda RX4 Wag 21.0 6 160.0 #> Datsun 710 22.8 4 108.0 #> Hornet 4 Drive 21.4 6 258.0 #> Hornet Sportabout 18.7 8 360.0 #> Valiant 18.1 6 225.0 #> Duster 360 14.3 8 360.0 #> Merc 240D 24.4 4 146.7 #> Merc 230 22.8 4 140.8 #> Merc 280 19.2 6 167.6 #> Merc 280C 17.8 6 167.6 #> Merc 450SE 16.4 8 275.8 #> Merc 450SL 17.3 8 275.8 #> Merc 450SLC 15.2 8 275.8 #> Cadillac Fleetwood 10.4 8 472.0 #> Lincoln Continental 10.4 8 460.0 #> Chrysler Imperial 14.7 8 440.0 #> Fiat 128 32.4 4 78.7 #> Honda Civic 30.4 4 75.7 #> Toyota Corolla 33.9 4 71.1 #> Toyota Corona 21.5 4 120.1 #> Dodge Challenger 15.5 8 318.0 #> AMC Javelin 15.2 8 304.0 #> Camaro Z28 13.3 8 350.0 #> Pontiac Firebird 19.2 8 400.0 #> Fiat X1-9 27.3 4 79.0 #> Porsche 914-2 26.0 4 120.3 #> Lotus Europa 30.4 4 95.1 #> Ford Pantera L 15.8 8 351.0 #> Ferrari Dino 19.7 6 145.0 #> Maserati Bora 15.0 8 301.0 #> Volvo 142E 21.4 4 121.0 glimpse(starwars) #> Rows: 87 #> Columns: 14 #> $ name \"Luke Skywalker\", \"C-3PO\", \"R2-D2\", \"Darth Vader\", \"L… #> $ height 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, … #> $ mass 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.… #> $ hair_color \"blond\", NA, NA, \"none\", \"brown\", \"brown, grey\", \"bro… #> $ skin_color \"fair\", \"gold\", \"white, blue\", \"white\", \"light\", \"lig… #> $ eye_color \"blue\", \"yellow\", \"red\", \"yellow\", \"brown\", \"blue\", \"… #> $ birth_year 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, … #> $ sex \"male\", \"none\", \"none\", \"male\", \"female\", \"male\", \"fe… #> $ gender \"masculine\", \"masculine\", \"masculine\", \"masculine\", \"… #> $ homeworld \"Tatooine\", \"Tatooine\", \"Naboo\", \"Tatooine\", \"Alderaa… #> $ species \"Human\", \"Droid\", \"Droid\", \"Human\", \"Human\", \"Human\",… #> $ films <\"A New Hope\", \"The Empire Strikes Back\", \"Return of… #> $ vehicles <\"Snowspeeder\", \"Imperial Speeder Bike\">, <>, <>, <>… #> $ starships <\"X-wing\", \"Imperial shuttle\">, <>, <>, \"TIE Advance…"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Group by one or more variables — group_by","title":"Group by one or more variables — group_by","text":"data operations done groups defined variables. group_by() takes existing tbl converts grouped tbl operations performed \"group\". ungroup() removes grouping.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Group by one or more variables — group_by","text":"","code":"group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) ungroup(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Group by one or more variables — group_by","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... group_by(), variables computations group . Computations always done ungrouped data frame. perform computations grouped data, need use separate mutate() step group_by(). Computations allowed nest_by(). ungroup(), variables remove grouping. .add FALSE, default, group_by() override existing groups. add existing groups, use .add = TRUE. argument previously called add, prevented creating new grouping variable called add, conflicts naming conventions. .drop Drop groups formed factor levels appear data? default TRUE except .data previously grouped .drop = FALSE. See group_by_drop_default() details. x tbl()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Group by one or more variables — group_by","text":"grouped data frame class grouped_df, unless combination ... add yields empty set grouping columns, case tibble returned.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Group by one or more variables — group_by","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: group_by(): dbplyr (tbl_lazy), dplyr (data.frame) . ungroup(): dbplyr (tbl_lazy), dplyr (data.frame, grouped_df, rowwise_df) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"ordering","dir":"Reference","previous_headings":"","what":"Ordering","title":"Group by one or more variables — group_by","text":"Currently, group_by() internally orders groups ascending order. results ordered output functions aggregate groups, summarise(). used grouping columns, character vectors ordered C locale performance reproducibility across R sessions. resulting ordering grouped operation matters dependent locale, follow grouped operation explicit call arrange() set .locale argument. example: often useful preliminary step generating content intended humans, HTML table.","code":"data %>% group_by(chr) %>% summarise(avg = mean(x)) %>% arrange(chr, .locale = \"en\")"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"legacy-behavior","dir":"Reference","previous_headings":"","what":"Legacy behavior","title":"Group by one or more variables — group_by","text":"Prior dplyr 1.1.0, character vector grouping columns ordered system locale. need temporarily revert behavior, can set global option dplyr.legacy_locale TRUE, used sparingly expect option removed future version dplyr. better update existing code explicitly call arrange(.locale = ) instead. Note setting dplyr.legacy_locale also force calls arrange() use system locale.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Group by one or more variables — group_by","text":"","code":"by_cyl <- mtcars %>% group_by(cyl) # grouping doesn't change how the data looks (apart from listing # how it's grouped): by_cyl #> # A tibble: 32 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # It changes how it acts with the other dplyr verbs: by_cyl %>% summarise( disp = mean(disp), hp = mean(hp) ) #> # A tibble: 3 × 3 #> cyl disp hp #> #> 1 4 105. 82.6 #> 2 6 183. 122. #> 3 8 353. 209. by_cyl %>% filter(disp == max(disp)) #> # A tibble: 3 × 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 # Each call to summarise() removes a layer of grouping by_vs_am <- mtcars %>% group_by(vs, am) by_vs <- by_vs_am %>% summarise(n = n()) #> `summarise()` has grouped output by 'vs'. You can override using the #> `.groups` argument. by_vs #> # A tibble: 4 × 3 #> # Groups: vs [2] #> vs am n #> #> 1 0 0 12 #> 2 0 1 6 #> 3 1 0 7 #> 4 1 1 7 by_vs %>% summarise(n = sum(n)) #> # A tibble: 2 × 2 #> vs n #> #> 1 0 18 #> 2 1 14 # To removing grouping, use ungroup by_vs %>% ungroup() %>% summarise(n = sum(n)) #> # A tibble: 1 × 1 #> n #> #> 1 32 # By default, group_by() overrides existing grouping by_cyl %>% group_by(vs, am) %>% group_vars() #> [1] \"vs\" \"am\" # Use add = TRUE to instead append by_cyl %>% group_by(vs, am, .add = TRUE) %>% group_vars() #> [1] \"cyl\" \"vs\" \"am\" # You can group by expressions: this is a short-hand # for a mutate() followed by a group_by() mtcars %>% group_by(vsam = vs + am) #> # A tibble: 32 × 12 #> # Groups: vsam [3] #> mpg cyl disp hp drat wt qsec vs am gear carb vsam #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 1 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 1 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 2 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 0 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 0 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 1 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 1 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 1 #> # ℹ 22 more rows # The implicit mutate() step is always performed on the # ungrouped data. Here we get 3 groups: mtcars %>% group_by(vs) %>% group_by(hp_cut = cut(hp, 3)) #> # A tibble: 32 × 12 #> # Groups: hp_cut [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 1 more variable: hp_cut # If you want it to be performed by groups, # you have to use an explicit mutate() call. # Here we get 3 groups per value of vs mtcars %>% group_by(vs) %>% mutate(hp_cut = cut(hp, 3)) %>% group_by(hp_cut) #> # A tibble: 32 × 12 #> # Groups: hp_cut [6] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 1 more variable: hp_cut # when factors are involved and .drop = FALSE, groups can be empty tbl <- tibble( x = 1:10, y = factor(rep(c(\"a\", \"c\"), each = 5), levels = c(\"a\", \"b\", \"c\")) ) tbl %>% group_by(y, .drop = FALSE) %>% group_rows() #> [3]> #> [[1]] #> [1] 1 2 3 4 5 #> #> [[2]] #> integer(0) #> #> [[3]] #> [1] 6 7 8 9 10 #>"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Group by a selection of variables — group_by_all","title":"Group by a selection of variables — group_by_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants group_by() group data frame selection variables. Like group_by(), optional mutate semantics.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Group by a selection of variables — group_by_all","text":"","code":"group_by_all( .tbl, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_at( .tbl, .vars, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_if( .tbl, .predicate, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) )"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Group by a selection of variables — group_by_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .add See group_by() .drop Drop groups formed factor levels appear data? default TRUE except .data previously grouped .drop = FALSE. See group_by_drop_default() details. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Group by a selection of variables — group_by_all","text":"Existing grouping variables maintained, even included selection.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Group by a selection of variables — group_by_all","text":"","code":"# Group a data frame by all variables: group_by_all(mtcars) #> # A tibble: 32 × 11 #> # Groups: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb [32] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% group_by(pick(everything())) #> # A tibble: 32 × 11 #> # Groups: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb [32] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # Group by variables selected with a predicate: group_by_if(iris, is.factor) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # -> iris %>% group_by(pick(where(is.factor))) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # Group by variables selected by name: group_by_at(mtcars, vars(vs, am)) #> # A tibble: 32 × 11 #> # Groups: vs, am [4] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% group_by(pick(vs, am)) #> # A tibble: 32 × 11 #> # Groups: vs, am [4] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # Like group_by(), the scoped variants have optional mutate # semantics. This provide a shortcut for group_by() + mutate(): d <- tibble(x=c(1,1,2,2), y=c(1,2,1,2)) group_by_all(d, as.factor) #> # A tibble: 4 × 2 #> # Groups: x, y [4] #> x y #> #> 1 1 1 #> 2 1 2 #> 3 2 1 #> 4 2 2 # -> d %>% group_by(across(everything(), as.factor)) #> # A tibble: 4 × 2 #> # Groups: x, y [4] #> x y #> #> 1 1 1 #> 2 1 2 #> 3 2 1 #> 4 2 2 group_by_if(iris, is.factor, as.character) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # -> iris %>% group_by(across(where(is.factor), as.character)) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":null,"dir":"Reference","previous_headings":"","what":"Default value for .drop argument of group_by — group_by_drop_default","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"Default value .drop argument group_by","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"","code":"group_by_drop_default(.tbl)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Default value for .drop argument of group_by — group_by_drop_default","text":".tbl data frame","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"TRUE unless .tbl grouped data frame previously obtained group_by(.drop = FALSE)","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_drop_default.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Default value for .drop argument of group_by — group_by_drop_default","text":"","code":"group_by_drop_default(iris) #> [1] TRUE iris %>% group_by(Species) %>% group_by_drop_default() #> [1] TRUE iris %>% group_by(Species, .drop = FALSE) %>% group_by_drop_default() #> [1] FALSE"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_prepare.html","id":null,"dir":"Reference","previous_headings":"","what":"Prepare for grouping and other operations — distinct_prepare","title":"Prepare for grouping and other operations — distinct_prepare","text":"*_prepare() performs standard manipulation needed prior actual data processing. needed packages implement dplyr backends.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_prepare.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prepare for grouping and other operations — distinct_prepare","text":"","code":"distinct_prepare( .data, vars, group_vars = character(), .keep_all = FALSE, caller_env = caller_env(2), error_call = caller_env() ) group_by_prepare( .data, ..., .add = FALSE, .dots = deprecated(), add = deprecated(), error_call = caller_env() )"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_by_prepare.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prepare for grouping and other operations — distinct_prepare","text":"list data Modified tbl groups Modified groups","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":null,"dir":"Reference","previous_headings":"","what":"Select grouping variables — group_cols","title":"Select grouping variables — group_cols","text":"selection helpers matches grouping variables. can used select() vars() selections.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select grouping variables — group_cols","text":"","code":"group_cols(vars = NULL, data = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select grouping variables — group_cols","text":"vars Deprecated; please use data instead. data advanced use . default NULL automatically finds \"current\" data frames.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_cols.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select grouping variables — group_cols","text":"","code":"gdf <- iris %>% group_by(Species) gdf %>% select(group_cols()) #> # A tibble: 150 × 1 #> # Groups: Species [3] #> Species #> #> 1 setosa #> 2 setosa #> 3 setosa #> 4 setosa #> 5 setosa #> 6 setosa #> 7 setosa #> 8 setosa #> 9 setosa #> 10 setosa #> # ℹ 140 more rows # Remove the grouping variables from mutate selections: gdf %>% mutate_at(vars(-group_cols()), `/`, 100) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 0.051 0.035 0.014 0.002 setosa #> 2 0.049 0.03 0.014 0.002 setosa #> 3 0.047 0.032 0.013 0.002 setosa #> 4 0.046 0.031 0.015 0.002 setosa #> 5 0.05 0.036 0.014 0.002 setosa #> 6 0.054 0.039 0.017 0.004 setosa #> 7 0.046 0.034 0.014 0.003 setosa #> 8 0.05 0.034 0.015 0.002 setosa #> 9 0.044 0.029 0.014 0.002 setosa #> 10 0.049 0.031 0.015 0.001 setosa #> # ℹ 140 more rows # -> No longer necessary with across() gdf %>% mutate(across(everything(), ~ . / 100)) #> # A tibble: 150 × 5 #> # Groups: Species [3] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 0.051 0.035 0.014 0.002 setosa #> 2 0.049 0.03 0.014 0.002 setosa #> 3 0.047 0.032 0.013 0.002 setosa #> 4 0.046 0.031 0.015 0.002 setosa #> 5 0.05 0.036 0.014 0.002 setosa #> 6 0.054 0.039 0.017 0.004 setosa #> 7 0.046 0.034 0.014 0.003 setosa #> 8 0.05 0.034 0.015 0.002 setosa #> 9 0.044 0.029 0.014 0.002 setosa #> 10 0.049 0.031 0.015 0.001 setosa #> # ℹ 140 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":null,"dir":"Reference","previous_headings":"","what":"Grouping metadata — group_data","title":"Grouping metadata — group_data","text":"collection functions accesses data grouped data frames various ways: group_data() returns data frame defines grouping structure. columns give values grouping variables. last column, always called .rows, list integer vectors gives location rows group. group_keys() returns data frame describing groups. group_rows() returns list integer vectors giving rows group contains. group_indices() returns integer vector length .data gives group row belongs . group_vars() gives names grouping variables character vector. groups() gives names grouping variables list symbols. group_size() gives size group. n_groups() gives total number groups. See context equivalent functions return values current group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Grouping metadata — group_data","text":"","code":"group_data(.data) group_keys(.tbl, ...) group_rows(.data) group_indices(.data, ...) group_vars(x) groups(x) group_size(x) n_groups(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Grouping metadata — group_data","text":".data, .tbl, x data frame extension (like tibble grouped tibble). ... Use ... now deprecated; please use group_by() first instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_data.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Grouping metadata — group_data","text":"","code":"df <- tibble(x = c(1,1,2,2)) group_vars(df) #> character(0) group_rows(df) #> [1]> #> [[1]] #> [1] 1 2 3 4 #> group_data(df) #> # A tibble: 1 × 1 #> .rows #> > #> 1 [4] group_indices(df) #> [1] 1 1 1 1 gf <- group_by(df, x) group_vars(gf) #> [1] \"x\" group_rows(gf) #> [2]> #> [[1]] #> [1] 1 2 #> #> [[2]] #> [1] 3 4 #> group_data(gf) #> # A tibble: 2 × 2 #> x .rows #> > #> 1 1 [2] #> 2 2 [2] group_indices(gf) #> [1] 1 1 2 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":null,"dir":"Reference","previous_headings":"","what":"Apply a function to each group — group_map","title":"Apply a function to each group — group_map","text":"group_map(), group_modify() group_walk() purrr-style functions can used iterate grouped tibbles.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Apply a function to each group — group_map","text":"","code":"group_map(.data, .f, ..., .keep = FALSE) group_modify(.data, .f, ..., .keep = FALSE) group_walk(.data, .f, ..., .keep = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Apply a function to each group — group_map","text":".data grouped tibble .f function formula apply group. function, used . least 2 formal arguments. formula, e.g. ~ head(.x), converted function. formula, can use . .x refer subset rows .tbl given group .y refer key, one row tibble one column per grouping variable identifies group ... Additional arguments passed .f .keep grouping variables kept .x","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Apply a function to each group — group_map","text":"group_modify() returns grouped tibble. case .f must return data frame. group_map() returns list results calling .f group. group_walk() calls .f side effects returns input .tbl, invisibly.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Apply a function to each group — group_map","text":"Use group_modify() summarize() limited, terms need return group. group_modify() good \"data frame , data frame \". limited, need use nested split workflow. group_modify() evolution (), used . conceptual group data frame exposed function .f two pieces information: subset data group, exposed .x. key, tibble exactly one row columns grouping variable, exposed .y. completeness, group_modify(), group_map group_walk() also work ungrouped data frames, case function applied entire data frame (exposed .x), .y one row tibble column, consistently group_keys().","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_map.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Apply a function to each group — group_map","text":"","code":"# return a list mtcars %>% group_by(cyl) %>% group_map(~ head(.x, 2L)) #> [[1]] #> # A tibble: 2 × 10 #> mpg disp hp drat wt qsec vs am gear carb #> #> 1 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> #> [[2]] #> # A tibble: 2 × 10 #> mpg disp hp drat wt qsec vs am gear carb #> #> 1 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 160 110 3.9 2.88 17.0 0 1 4 4 #> #> [[3]] #> # A tibble: 2 × 10 #> mpg disp hp drat wt qsec vs am gear carb #> #> 1 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 2 14.3 360 245 3.21 3.57 15.8 0 0 3 4 #> # return a tibble grouped by `cyl` with 2 rows per group # the grouping data is recalculated mtcars %>% group_by(cyl) %>% group_modify(~ head(.x, 2L)) #> # A tibble: 6 × 11 #> # Groups: cyl [3] #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 6 21 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 8 14.3 360 245 3.21 3.57 15.8 0 0 3 4 # a list of tibbles iris %>% group_by(Species) %>% group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #> [[1]] #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.803 0.344 2.34 0.0238 #> 2 Sepal.Length 0.132 0.0685 1.92 0.0607 #> #> [[2]] #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.185 0.514 0.360 7.20e- 1 #> 2 Sepal.Length 0.686 0.0863 7.95 2.59e-10 #> #> [[3]] #> # A tibble: 2 × 5 #> term estimate std.error statistic p.value #> #> 1 (Intercept) 0.610 0.417 1.46 1.50e- 1 #> 2 Sepal.Length 0.750 0.0630 11.9 6.30e-16 #> # a restructured grouped tibble iris %>% group_by(Species) %>% group_modify(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #> # A tibble: 6 × 6 #> # Groups: Species [3] #> Species term estimate std.error statistic p.value #> #> 1 setosa (Intercept) 0.803 0.344 2.34 2.38e- 2 #> 2 setosa Sepal.Length 0.132 0.0685 1.92 6.07e- 2 #> 3 versicolor (Intercept) 0.185 0.514 0.360 7.20e- 1 #> 4 versicolor Sepal.Length 0.686 0.0863 7.95 2.59e-10 #> 5 virginica (Intercept) 0.610 0.417 1.46 1.50e- 1 #> 6 virginica Sepal.Length 0.750 0.0630 11.9 6.30e-16 # a list of vectors iris %>% group_by(Species) %>% group_map(~ quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75))) #> [[1]] #> 25% 50% 75% #> 1.400 1.500 1.575 #> #> [[2]] #> 25% 50% 75% #> 4.00 4.35 4.60 #> #> [[3]] #> 25% 50% 75% #> 5.100 5.550 5.875 #> # to use group_modify() the lambda must return a data frame iris %>% group_by(Species) %>% group_modify(~ { quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) %>% tibble::enframe(name = \"prob\", value = \"quantile\") }) #> # A tibble: 9 × 3 #> # Groups: Species [3] #> Species prob quantile #> #> 1 setosa 25% 1.4 #> 2 setosa 50% 1.5 #> 3 setosa 75% 1.58 #> 4 versicolor 25% 4 #> 5 versicolor 50% 4.35 #> 6 versicolor 75% 4.6 #> 7 virginica 25% 5.1 #> 8 virginica 50% 5.55 #> 9 virginica 75% 5.88 iris %>% group_by(Species) %>% group_modify(~ { .x %>% purrr::map_dfc(fivenum) %>% mutate(nms = c(\"min\", \"Q1\", \"median\", \"Q3\", \"max\")) }) #> # A tibble: 15 × 6 #> # Groups: Species [3] #> Species Sepal.Length Sepal.Width Petal.Length Petal.Width nms #> #> 1 setosa 4.3 2.3 1 0.1 min #> 2 setosa 4.8 3.2 1.4 0.2 Q1 #> 3 setosa 5 3.4 1.5 0.2 median #> 4 setosa 5.2 3.7 1.6 0.3 Q3 #> 5 setosa 5.8 4.4 1.9 0.6 max #> 6 versicolor 4.9 2 3 1 min #> 7 versicolor 5.6 2.5 4 1.2 Q1 #> 8 versicolor 5.9 2.8 4.35 1.3 median #> 9 versicolor 6.3 3 4.6 1.5 Q3 #> 10 versicolor 7 3.4 5.1 1.8 max #> 11 virginica 4.9 2.2 4.5 1.4 min #> 12 virginica 6.2 2.8 5.1 1.8 Q1 #> 13 virginica 6.5 3 5.55 2 median #> 14 virginica 6.9 3.2 5.9 2.3 Q3 #> 15 virginica 7.9 3.8 6.9 2.5 max # group_walk() is for side effects dir.create(temp <- tempfile()) iris %>% group_by(Species) %>% group_walk(~ write.csv(.x, file = file.path(temp, paste0(.y$Species, \".csv\")))) list.files(temp, pattern = \"csv$\") #> [1] \"setosa.csv\" \"versicolor.csv\" \"virginica.csv\" unlink(temp, recursive = TRUE) # group_modify() and ungrouped data frames mtcars %>% group_modify(~ head(.x, 2L)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":null,"dir":"Reference","previous_headings":"","what":"Nest a tibble using a grouping specification — group_nest","title":"Nest a tibble using a grouping specification — group_nest","text":"Nest tibble using grouping specification","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Nest a tibble using a grouping specification — group_nest","text":"","code":"group_nest(.tbl, ..., .key = \"data\", keep = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Nest a tibble using a grouping specification — group_nest","text":".tbl tbl ... Grouping specification, forwarded group_by() .key name list column keep grouping columns kept list column.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Nest a tibble using a grouping specification — group_nest","text":"tbl one row per unique combination grouping variables. first columns grouping variables, followed list column tibbles matching rows remaining columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"lifecycle","dir":"Reference","previous_headings":"","what":"Lifecycle","title":"Nest a tibble using a grouping specification — group_nest","text":"group_nest() stable tidyr::nest(.=) provides similar behavior. may deprecated future.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"grouped-data-frames","dir":"Reference","previous_headings":"","what":"Grouped data frames","title":"Nest a tibble using a grouping specification — group_nest","text":"primary use case group_nest() already grouped data frames, typically result group_by(). case group_nest() uses first argument, grouped tibble, warns ... used.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"ungrouped-data-frames","dir":"Reference","previous_headings":"","what":"Ungrouped data frames","title":"Nest a tibble using a grouping specification — group_nest","text":"used ungrouped data frames, group_nest() forwards ... group_by() nesting, therefore ... subject data mask.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_nest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Nest a tibble using a grouping specification — group_nest","text":"","code":"#----- use case 1: a grouped data frame iris %>% group_by(Species) %>% group_nest() #> # A tibble: 3 × 2 #> Species data #> > #> 1 setosa [50 × 4] #> 2 versicolor [50 × 4] #> 3 virginica [50 × 4] # this can be useful if the grouped data has been altered before nesting iris %>% group_by(Species) %>% filter(Sepal.Length > mean(Sepal.Length)) %>% group_nest() #> # A tibble: 3 × 2 #> Species data #> > #> 1 setosa [22 × 4] #> 2 versicolor [24 × 4] #> 3 virginica [22 × 4] #----- use case 2: using group_nest() on a ungrouped data frame with # a grouping specification that uses the data mask starwars %>% group_nest(species, homeworld) #> # A tibble: 57 × 3 #> species homeworld data #> > #> 1 Aleena Aleen Minor [1 × 12] #> 2 Besalisk Ojom [1 × 12] #> 3 Cerean Cerea [1 × 12] #> 4 Chagrian Champala [1 × 12] #> 5 Clawdite Zolan [1 × 12] #> 6 Droid Naboo [1 × 12] #> 7 Droid Tatooine [2 × 12] #> 8 Droid NA [3 × 12] #> 9 Dug Malastare [1 × 12] #> 10 Ewok Endor [1 × 12] #> # ℹ 47 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":null,"dir":"Reference","previous_headings":"","what":"Split data frame by groups — group_split","title":"Split data frame by groups — group_split","text":"group_split() works like base::split() : uses grouping structure group_by() therefore subject data mask name elements list based grouping works well single character grouping variable. Instead, use group_keys() access data frame defines groups. group_split() primarily designed work grouped data frames. can pass ... group split ungrouped data frame, generally useful want easy access group metadata.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Split data frame by groups — group_split","text":"","code":"group_split(.tbl, ..., .keep = TRUE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Split data frame by groups — group_split","text":".tbl tbl. ... .tbl ungrouped data frame, grouping specification, forwarded group_by(). .keep grouping columns kept?","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Split data frame by groups — group_split","text":"list tibbles. tibble contains rows .tbl associated group columns, including grouping variables. Note returns list_of slightly stricter simple list useful representing lists every element type.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"lifecycle","dir":"Reference","previous_headings":"","what":"Lifecycle","title":"Split data frame by groups — group_split","text":"group_split() stable can achieve similar results manipulating nested column returned tidyr::nest(.=). also retains group keys within single data structure. group_split() may deprecated future.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_split.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Split data frame by groups — group_split","text":"","code":"ir <- iris %>% group_by(Species) group_split(ir) #> tbl_df< #> Sepal.Length: double #> Sepal.Width : double #> Petal.Length: double #> Petal.Width : double #> Species : factor #> > #> >[3]> #> [[1]] #> # A tibble: 50 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 40 more rows #> #> [[2]] #> # A tibble: 50 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 7 3.2 4.7 1.4 versicolor #> 2 6.4 3.2 4.5 1.5 versicolor #> 3 6.9 3.1 4.9 1.5 versicolor #> 4 5.5 2.3 4 1.3 versicolor #> 5 6.5 2.8 4.6 1.5 versicolor #> 6 5.7 2.8 4.5 1.3 versicolor #> 7 6.3 3.3 4.7 1.6 versicolor #> 8 4.9 2.4 3.3 1 versicolor #> 9 6.6 2.9 4.6 1.3 versicolor #> 10 5.2 2.7 3.9 1.4 versicolor #> # ℹ 40 more rows #> #> [[3]] #> # A tibble: 50 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 6.3 3.3 6 2.5 virginica #> 2 5.8 2.7 5.1 1.9 virginica #> 3 7.1 3 5.9 2.1 virginica #> 4 6.3 2.9 5.6 1.8 virginica #> 5 6.5 3 5.8 2.2 virginica #> 6 7.6 3 6.6 2.1 virginica #> 7 4.9 2.5 4.5 1.7 virginica #> 8 7.3 2.9 6.3 1.8 virginica #> 9 6.7 2.5 5.8 1.8 virginica #> 10 7.2 3.6 6.1 2.5 virginica #> # ℹ 40 more rows #> group_keys(ir) #> # A tibble: 3 × 1 #> Species #> #> 1 setosa #> 2 versicolor #> 3 virginica"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":null,"dir":"Reference","previous_headings":"","what":"Trim grouping structure — group_trim","title":"Trim grouping structure — group_trim","text":"Drop unused levels factors used grouping variables, recalculates grouping structure. group_trim() particularly useful filter() intended select subset groups.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trim grouping structure — group_trim","text":"","code":"group_trim(.tbl, .drop = group_by_drop_default(.tbl))"},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Trim grouping structure — group_trim","text":".tbl grouped data frame .drop See group_by()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Trim grouping structure — group_trim","text":"grouped data frame","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/group_trim.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Trim grouping structure — group_trim","text":"","code":"iris %>% group_by(Species) %>% filter(Species == \"setosa\", .preserve = TRUE) %>% group_trim() #> # A tibble: 50 × 5 #> # Groups: Species [1] #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 40 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/grouped_df.html","id":null,"dir":"Reference","previous_headings":"","what":"A grouped data frame. — grouped_df","title":"A grouped data frame. — grouped_df","text":"easiest way create grouped data frame call group_by() method data frame tbl: take care capturing unevaluated expressions . functions designed programmatic use. data analysis purposes see group_data() accessor functions retrieve various metadata grouped data frames.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/grouped_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A grouped data frame. — grouped_df","text":"","code":"grouped_df(data, vars, drop = group_by_drop_default(data)) is.grouped_df(x) is_grouped_df(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/grouped_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A grouped data frame. — grouped_df","text":"data tbl data frame. vars character vector. drop .drop = TRUE, empty groups dropped.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":null,"dir":"Reference","previous_headings":"","what":"Flag a character vector as SQL identifiers — ident","title":"Flag a character vector as SQL identifiers — ident","text":"ident() takes unquoted strings flags identifiers. ident_q() assumes input already quoted, ensures get quoted . currently used schema.table.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Flag a character vector as SQL identifiers — ident","text":"","code":"ident(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Flag a character vector as SQL identifiers — ident","text":"... character vector, name-value pairs","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ident.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Flag a character vector as SQL identifiers — ident","text":"","code":"# Identifiers are escaped with \" ident(\"x\") #> x"},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":null,"dir":"Reference","previous_headings":"","what":"Vectorised if-else — if_else","title":"Vectorised if-else — if_else","text":"if_else() vectorized -else. Compared base R equivalent, ifelse(), function allows handle missing values condition missing always takes true, false, missing account determining output type .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Vectorised if-else — if_else","text":"","code":"if_else(condition, true, false, missing = NULL, ..., ptype = NULL, size = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Vectorised if-else — if_else","text":"condition logical vector true, false Vectors use TRUE FALSE values condition. true false recycled size condition. true, false, missing (used) cast common type. missing NULL, used value NA values condition. Follows size type rules true false. ... dots future extensions must empty. ptype optional prototype declaring desired output type. supplied, overrides common type true, false, missing. size optional size declaring desired output size. supplied, overrides size condition.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Vectorised if-else — if_else","text":"vector size condition type common type true, false, missing. condition TRUE, matching values true, FALSE, matching values false, NA, matching values missing, provided, otherwise missing value used.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/if_else.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Vectorised if-else — if_else","text":"","code":"x <- c(-5:5, NA) if_else(x < 0, NA, x) #> [1] NA NA NA NA NA 0 1 2 3 4 5 NA # Explicitly handle `NA` values in the `condition` with `missing` if_else(x < 0, \"negative\", \"positive\", missing = \"missing\") #> [1] \"negative\" \"negative\" \"negative\" \"negative\" \"negative\" \"positive\" #> [7] \"positive\" \"positive\" \"positive\" \"positive\" \"positive\" \"missing\" # Unlike `ifelse()`, `if_else()` preserves types x <- factor(sample(letters[1:5], 10, replace = TRUE)) ifelse(x %in% c(\"a\", \"b\", \"c\"), x, NA) #> [1] 2 3 NA 3 NA 1 2 3 2 NA if_else(x %in% c(\"a\", \"b\", \"c\"), x, NA) #> [1] b c c a b c b #> Levels: a b c d e # `if_else()` is often useful for creating new columns inside of `mutate()` starwars %>% mutate(category = if_else(height < 100, \"short\", \"tall\"), .keep = \"used\") #> # A tibble: 87 × 2 #> height category #> #> 1 172 tall #> 2 167 tall #> 3 96 short #> 4 202 tall #> 5 150 tall #> 6 178 tall #> 7 165 tall #> 8 97 short #> 9 183 tall #> 10 182 tall #> # ℹ 77 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Join specifications — join_by","title":"Join specifications — join_by","text":"join_by() constructs specification describes join two tables using small domain specific language. result can supplied argument join functions (left_join()).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Join specifications — join_by","text":"","code":"join_by(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Join specifications — join_by","text":"... Expressions specifying join. expression consist one following: Equality condition: == Inequality conditions: >=, >, <=, < Rolling helper: closest() Overlap helpers: (), within(), overlaps() expressions supported. need perform join computed variable, e.g. join_by(sales_date - 40 >= promo_date), need precompute store separate column. Column names specified quoted unquoted names. default, name left-hand side join condition refers left-hand table, unless overridden explicitly prefixing column name either x$ y$. single column name provided without join conditions, interpreted column name duplicated side ==, .e. x interpreted x == x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"join-types","dir":"Reference","previous_headings":"","what":"Join types","title":"Join specifications — join_by","text":"following types joins supported dplyr: Equality joins Inequality joins Rolling joins Overlap joins Cross joins Equality, inequality, rolling, overlap joins discussed detail . Cross joins implemented cross_join().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"equality-joins","dir":"Reference","previous_headings":"","what":"Equality joins","title":"Join specifications — join_by","text":"Equality joins require keys equal one pairs columns, common type join. construct equality join using join_by(), supply two column names join separated ==. Alternatively, supplying single name interpreted equality join two columns name. example, join_by(x) equivalent join_by(x == x).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"inequality-joins","dir":"Reference","previous_headings":"","what":"Inequality joins","title":"Join specifications — join_by","text":"Inequality joins match inequality, >, >=, <, <=, common time series analysis genomics. construct inequality join using join_by(), supply two column names separated one mentioned inequalities. Note inequality joins match single row x potentially large number rows y. extra careful constructing inequality join specifications!","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"rolling-joins","dir":"Reference","previous_headings":"","what":"Rolling joins","title":"Join specifications — join_by","text":"Rolling joins variant inequality joins limit results returned inequality join condition. useful \"rolling\" closest match forward/backwards exact match. construct rolling join, wrap inequality closest(). closest(expr) expr must inequality involving one : >, >=, <, <=. example, closest(x >= y) interpreted : value x, find closest value y less equal x value. closest() always use left-hand table (x) primary table, right-hand table (y) one find closest match , regardless inequality specified. example, closest(y$>= x$b) always interpreted closest(x$b <= y$).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"overlap-joins","dir":"Reference","previous_headings":"","what":"Overlap joins","title":"Join specifications — join_by","text":"Overlap joins special case inequality joins involving one two columns left-hand table overlapping range defined two columns right-hand table. three helpers join_by() recognizes assist constructing overlap joins, can constructed simpler inequalities. (x, y_lower, y_upper, ..., bounds = \"[]\") value x, finds everywhere value falls [y_lower, y_upper]. Equivalent x >= y_lower, x <= y_upper default. bounds can one \"[]\", \"[)\", \"(]\", \"()\" alter inclusiveness lower upper bounds. changes whether >= > <= < used build inequalities shown . Dots future extensions must empty. within(x_lower, x_upper, y_lower, y_upper) range [x_lower, x_upper], finds everywhere range falls completely within [y_lower, y_upper]. Equivalent x_lower >= y_lower, x_upper <= y_upper. inequalities used build within() regardless inclusiveness supplied ranges. overlaps(x_lower, x_upper, y_lower, y_upper, ..., bounds = \"[]\") range [x_lower, x_upper], finds everywhere range overlaps [y_lower, y_upper] capacity. Equivalent x_lower <= y_upper, x_upper >= y_lower default. bounds can one \"[]\", \"[)\", \"(]\", \"()\" alter inclusiveness lower upper bounds. \"[]\" uses <= >=, 3 options use < > generate exact inequalities. Dots future extensions must empty. conditions assume ranges well-formed non-empty, .e. x_lower <= x_upper bounds treated \"[]\", x_lower < x_upper otherwise.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"column-referencing","dir":"Reference","previous_headings":"","what":"Column referencing","title":"Join specifications — join_by","text":"specifying join conditions, join_by() assumes column names left-hand side condition refer left-hand table (x), names right-hand side condition refer right-hand table (y). Occasionally, clearer able specify right-hand table name left-hand side condition, vice versa. support , column names can prefixed x$ y$ explicitly specify table come .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/join_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Join specifications — join_by","text":"","code":"sales <- tibble( id = c(1L, 1L, 1L, 2L, 2L), sale_date = as.Date(c(\"2018-12-31\", \"2019-01-02\", \"2019-01-05\", \"2019-01-04\", \"2019-01-01\")) ) sales #> # A tibble: 5 × 2 #> id sale_date #> #> 1 1 2018-12-31 #> 2 1 2019-01-02 #> 3 1 2019-01-05 #> 4 2 2019-01-04 #> 5 2 2019-01-01 promos <- tibble( id = c(1L, 1L, 2L), promo_date = as.Date(c(\"2019-01-01\", \"2019-01-05\", \"2019-01-02\")) ) promos #> # A tibble: 3 × 2 #> id promo_date #> #> 1 1 2019-01-01 #> 2 1 2019-01-05 #> 3 2 2019-01-02 # Match `id` to `id`, and `sale_date` to `promo_date` by <- join_by(id, sale_date == promo_date) left_join(sales, promos, by) #> # A tibble: 5 × 2 #> id sale_date #> #> 1 1 2018-12-31 #> 2 1 2019-01-02 #> 3 1 2019-01-05 #> 4 2 2019-01-04 #> 5 2 2019-01-01 # For each `sale_date` within a particular `id`, # find all `promo_date`s that occurred before that particular sale by <- join_by(id, sale_date >= promo_date) left_join(sales, promos, by) #> # A tibble: 6 × 3 #> id sale_date promo_date #> #> 1 1 2018-12-31 NA #> 2 1 2019-01-02 2019-01-01 #> 3 1 2019-01-05 2019-01-01 #> 4 1 2019-01-05 2019-01-05 #> 5 2 2019-01-04 2019-01-02 #> 6 2 2019-01-01 NA # For each `sale_date` within a particular `id`, # find only the closest `promo_date` that occurred before that sale by <- join_by(id, closest(sale_date >= promo_date)) left_join(sales, promos, by) #> # A tibble: 5 × 3 #> id sale_date promo_date #> #> 1 1 2018-12-31 NA #> 2 1 2019-01-02 2019-01-01 #> 3 1 2019-01-05 2019-01-05 #> 4 2 2019-01-04 2019-01-02 #> 5 2 2019-01-01 NA # If you want to disallow exact matching in rolling joins, use `>` rather # than `>=`. Note that the promo on `2019-01-05` is no longer considered the # closest match for the sale on the same date. by <- join_by(id, closest(sale_date > promo_date)) left_join(sales, promos, by) #> # A tibble: 5 × 3 #> id sale_date promo_date #> #> 1 1 2018-12-31 NA #> 2 1 2019-01-02 2019-01-01 #> 3 1 2019-01-05 2019-01-01 #> 4 2 2019-01-04 2019-01-02 #> 5 2 2019-01-01 NA # Same as before, but also require that the promo had to occur at most 1 # day before the sale was made. We'll use a full join to see that id 2's # promo on `2019-01-02` is no longer matched to the sale on `2019-01-04`. sales <- mutate(sales, sale_date_lower = sale_date - 1) by <- join_by(id, closest(sale_date >= promo_date), sale_date_lower <= promo_date) full_join(sales, promos, by) #> # A tibble: 6 × 4 #> id sale_date sale_date_lower promo_date #> #> 1 1 2018-12-31 2018-12-30 NA #> 2 1 2019-01-02 2019-01-01 2019-01-01 #> 3 1 2019-01-05 2019-01-04 2019-01-05 #> 4 2 2019-01-04 2019-01-03 NA #> 5 2 2019-01-01 2018-12-31 NA #> 6 2 NA NA 2019-01-02 # --------------------------------------------------------------------------- segments <- tibble( segment_id = 1:4, chromosome = c(\"chr1\", \"chr2\", \"chr2\", \"chr1\"), start = c(140, 210, 380, 230), end = c(150, 240, 415, 280) ) segments #> # A tibble: 4 × 4 #> segment_id chromosome start end #> #> 1 1 chr1 140 150 #> 2 2 chr2 210 240 #> 3 3 chr2 380 415 #> 4 4 chr1 230 280 reference <- tibble( reference_id = 1:4, chromosome = c(\"chr1\", \"chr1\", \"chr2\", \"chr2\"), start = c(100, 200, 300, 415), end = c(150, 250, 399, 450) ) reference #> # A tibble: 4 × 4 #> reference_id chromosome start end #> #> 1 1 chr1 100 150 #> 2 2 chr1 200 250 #> 3 3 chr2 300 399 #> 4 4 chr2 415 450 # Find every time a segment `start` falls between the reference # `[start, end]` range. by <- join_by(chromosome, between(start, start, end)) full_join(segments, reference, by) #> # A tibble: 5 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 #> 2 2 chr2 210 240 NA NA NA #> 3 3 chr2 380 415 3 300 399 #> 4 4 chr1 230 280 2 200 250 #> 5 NA chr2 NA NA 4 415 450 # If you wanted the reference columns first, supply `reference` as `x` # and `segments` as `y`, then explicitly refer to their columns using `x$` # and `y$`. by <- join_by(chromosome, between(y$start, x$start, x$end)) full_join(reference, segments, by) #> # A tibble: 5 × 7 #> reference_id chromosome start.x end.x segment_id start.y end.y #> #> 1 1 chr1 100 150 1 140 150 #> 2 2 chr1 200 250 4 230 280 #> 3 3 chr2 300 399 3 380 415 #> 4 4 chr2 415 450 NA NA NA #> 5 NA chr2 NA NA 2 210 240 # Find every time a segment falls completely within a reference. # Sometimes using `x$` and `y$` makes your intentions clearer, even if they # match the default behavior. by <- join_by(chromosome, within(x$start, x$end, y$start, y$end)) inner_join(segments, reference, by) #> # A tibble: 1 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 # Find every time a segment overlaps a reference in any way. by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end)) full_join(segments, reference, by) #> # A tibble: 5 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 #> 2 2 chr2 210 240 NA NA NA #> 3 3 chr2 380 415 3 300 399 #> 4 3 chr2 380 415 4 415 450 #> 5 4 chr1 230 280 2 200 250 # It is common to have right-open ranges with bounds like `[)`, which would # mean an end value of `415` would no longer overlap a start value of `415`. # Setting `bounds` allows you to compute overlaps with those kinds of ranges. by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end, bounds = \"[)\")) full_join(segments, reference, by) #> # A tibble: 5 × 7 #> segment_id chromosome start.x end.x reference_id start.y end.y #> #> 1 1 chr1 140 150 1 100 150 #> 2 2 chr2 210 240 NA NA NA #> 3 3 chr2 380 415 3 300 399 #> 4 4 chr1 230 280 2 200 250 #> 5 NA chr2 NA NA 4 415 450"},{"path":"https://dplyr.tidyverse.org/dev/reference/last_dplyr_warnings.html","id":null,"dir":"Reference","previous_headings":"","what":"Show warnings from the last command — last_dplyr_warnings","title":"Show warnings from the last command — last_dplyr_warnings","text":"Warnings occur inside dplyr verb like mutate() caught stashed away instead emitted console. prevents rowwise grouped data frames flooding console warnings. see original warnings, use last_dplyr_warnings().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/last_dplyr_warnings.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Show warnings from the last command — last_dplyr_warnings","text":"","code":"last_dplyr_warnings(n = 5)"},{"path":"https://dplyr.tidyverse.org/dev/reference/last_dplyr_warnings.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Show warnings from the last command — last_dplyr_warnings","text":"n Passed head() first n warnings displayed.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute lagged or leading values — lead-lag","title":"Compute lagged or leading values — lead-lag","text":"Find \"previous\" (lag()) \"next\" (lead()) values vector. Useful comparing values behind ahead current values.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute lagged or leading values — lead-lag","text":"","code":"lag(x, n = 1L, default = NULL, order_by = NULL, ...) lead(x, n = 1L, default = NULL, order_by = NULL, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute lagged or leading values — lead-lag","text":"x vector n Positive integer length 1, giving number positions lag lead default value used pad x back original size lag lead applied. default, NULL, pads missing value. supplied, must vector size 1, cast type x. order_by optional secondary vector defines ordering use applying lag lead x. supplied, must size x. ... used.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute lagged or leading values — lead-lag","text":"vector type size x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/lead-lag.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compute lagged or leading values — lead-lag","text":"","code":"lag(1:5) #> [1] NA 1 2 3 4 lead(1:5) #> [1] 2 3 4 5 NA x <- 1:5 tibble(behind = lag(x), x, ahead = lead(x)) #> # A tibble: 5 × 3 #> behind x ahead #> #> 1 NA 1 2 #> 2 1 2 3 #> 3 2 3 4 #> 4 3 4 5 #> 5 4 5 NA # If you want to look more rows behind or ahead, use `n` lag(1:5, n = 1) #> [1] NA 1 2 3 4 lag(1:5, n = 2) #> [1] NA NA 1 2 3 lead(1:5, n = 1) #> [1] 2 3 4 5 NA lead(1:5, n = 2) #> [1] 3 4 5 NA NA # If you want to define a value to pad with, use `default` lag(1:5) #> [1] NA 1 2 3 4 lag(1:5, default = 0) #> [1] 0 1 2 3 4 lead(1:5) #> [1] 2 3 4 5 NA lead(1:5, default = 6) #> [1] 2 3 4 5 6 # If the data are not already ordered, use `order_by` scrambled <- slice_sample( tibble(year = 2000:2005, value = (0:5) ^ 2), prop = 1 ) wrong <- mutate(scrambled, previous_year_value = lag(value)) arrange(wrong, year) #> # A tibble: 6 × 3 #> year value previous_year_value #> #> 1 2000 0 25 #> 2 2001 1 4 #> 3 2002 4 0 #> 4 2003 9 16 #> 5 2004 16 NA #> 6 2005 25 9 right <- mutate(scrambled, previous_year_value = lag(value, order_by = year)) arrange(right, year) #> # A tibble: 6 × 3 #> year value previous_year_value #> #> 1 2000 0 NA #> 2 2001 1 0 #> 3 2002 4 1 #> 4 2003 9 4 #> 5 2004 16 9 #> 6 2005 25 16"},{"path":"https://dplyr.tidyverse.org/dev/reference/make_tbl.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ","title":"Create a ","text":"tbl() standard constructor tbls. .tbl() coerces, .tbl() tests.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/make_tbl.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ","text":"","code":"make_tbl(subclass, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/make_tbl.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a ","text":"subclass name subclass. \"tbl\" abstract base class, must supply value. tbl_ automatically prepended class name ... tbl(), fields used class. .tbl(), arguments passed methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":null,"dir":"Reference","previous_headings":"","what":"Mutating joins — mutate-joins","title":"Mutating joins — mutate-joins","text":"Mutating joins add columns y x, matching observations based keys. four mutating joins: inner join, three outer joins.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"inner-join","dir":"Reference","previous_headings":"","what":"Inner join","title":"Mutating joins — mutate-joins","text":"inner_join() keeps observations x matching key y. important property inner join unmatched rows either input included result. means generally inner joins appropriate analyses, easy lose observations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"outer-joins","dir":"Reference","previous_headings":"","what":"Outer joins","title":"Mutating joins — mutate-joins","text":"three outer joins keep observations appear least one data frames: left_join() keeps observations x. right_join() keeps observations y. full_join() keeps observations x y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mutating joins — mutate-joins","text":"","code":"inner_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame inner_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", unmatched = \"drop\", relationship = NULL ) left_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame left_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", unmatched = \"drop\", relationship = NULL ) right_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame right_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", unmatched = \"drop\", relationship = NULL ) full_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL ) # S3 method for data.frame full_join( x, y, by = NULL, copy = FALSE, suffix = c(\".x\", \".y\"), ..., keep = NULL, na_matches = c(\"na\", \"never\"), multiple = \"all\", relationship = NULL )"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mutating joins — mutate-joins","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. join specification created join_by(), character vector variables join . NULL, default, *_join() perform natural join, using variables common across x y. message lists variables can check correct; suppress message supplying explicitly. join different variables x y, use join_by() specification. example, join_by(== b) match x$y$b. join multiple variables, use join_by() specification multiple expressions. example, join_by(== b, c == d) match x$y$b x$c y$d. column names x y, can shorten listing variable names, like join_by(, c). join_by() can also used perform inequality, rolling, overlap joins. See documentation ?join_by details types joins. simple equality joins, can alternatively specify character vector variable names join . example, = c(\"\", \"b\") joins x$y$x$b y$b. variable names differ x y, use named character vector like = c(\"x_a\" = \"y_a\", \"x_b\" = \"y_b\"). perform cross-join, generating combinations x y, see cross_join(). copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . suffix non-joined duplicate variables x y, suffixes added output disambiguate . character vector length 2. ... parameters passed onto methods. keep join keys x y preserved output? NULL, default, joins equality retain keys x, joins inequality retain keys inputs. TRUE, keys inputs retained. FALSE, keys x retained. right full joins, data key columns corresponding rows exist y merged key columns x. used joining inequality conditions. na_matches two NA two NaN values match? \"na\", default, treats two NA two NaN values equal, like %%, match(), merge(). \"never\" treats two NA two NaN values different, never match together values. similar joins database sources base::merge(incomparables = NA). multiple Handling rows x multiple matches y. row x: \"\", default, returns every match detected y. behavior SQL. \"\" returns one match detected y, guarantees match returned. often faster \"first\" \"last\" just need detect least one match. \"first\" returns first match detected y. \"last\" returns last match detected y. unmatched unmatched keys result dropped rows handled? \"drop\" drops unmatched keys result. \"error\" throws error unmatched keys detected. unmatched intended protect accidentally dropping rows join. checks unmatched keys input potentially drop rows. left joins, checks y. right joins, checks x. inner joins, checks x y. case, unmatched also allowed character vector length 2 specify behavior x y independently. relationship Handling expected relationship keys x y. expectations chosen list invalidated, error thrown. NULL, default, expect relationship x y. However, equality joins check many--many relationship (typically unexpected) warn one occurs, encouraging either take closer look inputs make relationship explicit specifying \"many--many\". See Many--many relationships section details. \"one--one\" expects: row x matches 1 row y. row y matches 1 row x. \"one--many\" expects: row y matches 1 row x. \"many--one\" expects: row x matches 1 row y. \"many--many\" perform relationship checks, provided allow explicit relationship know exists. relationship handle cases zero matches. , see unmatched.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mutating joins — mutate-joins","text":"object type x (including groups). order rows columns x preserved much possible. output following properties: rows affect join type. inner_join() returns matched x rows. left_join() returns x rows. right_join() returns matched x rows, followed unmatched y rows. full_join() returns x rows, followed unmatched y rows. Output columns include columns x non-key columns y. keep = TRUE, key columns y included well. non-key columns x y name, suffixes added disambiguate. keep = TRUE key columns x y name, suffixes added disambiguate well. keep = FALSE, output columns included coerced common type x y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"many-to-many-relationships","dir":"Reference","previous_headings":"","what":"Many-to-many relationships","title":"Mutating joins — mutate-joins","text":"default, dplyr guards many--many relationships equality joins throwing warning. occur following true: row x matches multiple rows y. row y matches multiple rows x. typically surprising, joins involve relationship one--one, one--many, many--one, often result improperly specified join. Many--many relationships particularly problematic can result Cartesian explosion number rows returned join. many--many relationship expected, silence warning explicitly setting relationship = \"many--many\". production code, best preemptively set relationship whatever relationship expect exist keys x y, forces error occur immediately data align expectations. Inequality joins typically result many--many relationships nature, warn default, still take extra care specifying inequality join, also capability return large number rows. Rolling joins warn many--many relationships either, many rolling joins follow many--one relationship, often useful set relationship = \"many--one\" enforce . Note SQL, database providers let specify many--many relationship two tables, instead requiring create third junction table results two one--many relationships instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Mutating joins — mutate-joins","text":"functions generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: inner_join(): dbplyr (tbl_lazy), dplyr (data.frame) . left_join(): dbplyr (tbl_lazy), dplyr (data.frame) . right_join(): dbplyr (tbl_lazy), dplyr (data.frame) . full_join(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate-joins.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mutating joins — mutate-joins","text":"","code":"band_members %>% inner_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 2 × 3 #> name band plays #> #> 1 John Beatles guitar #> 2 Paul Beatles bass band_members %>% left_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 3 × 3 #> name band plays #> #> 1 Mick Stones NA #> 2 John Beatles guitar #> 3 Paul Beatles bass band_members %>% right_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 3 × 3 #> name band plays #> #> 1 John Beatles guitar #> 2 Paul Beatles bass #> 3 Keith NA guitar band_members %>% full_join(band_instruments) #> Joining with `by = join_by(name)` #> # A tibble: 4 × 3 #> name band plays #> #> 1 Mick Stones NA #> 2 John Beatles guitar #> 3 Paul Beatles bass #> 4 Keith NA guitar # To suppress the message about joining variables, supply `by` band_members %>% inner_join(band_instruments, by = join_by(name)) #> # A tibble: 2 × 3 #> name band plays #> #> 1 John Beatles guitar #> 2 Paul Beatles bass # This is good practice in production code # Use an equality expression if the join variables have different names band_members %>% full_join(band_instruments2, by = join_by(name == artist)) #> # A tibble: 4 × 3 #> name band plays #> #> 1 Mick Stones NA #> 2 John Beatles guitar #> 3 Paul Beatles bass #> 4 Keith NA guitar # By default, the join keys from `x` and `y` are coalesced in the output; use # `keep = TRUE` to keep the join keys from both `x` and `y` band_members %>% full_join(band_instruments2, by = join_by(name == artist), keep = TRUE) #> # A tibble: 4 × 4 #> name band artist plays #> #> 1 Mick Stones NA NA #> 2 John Beatles John guitar #> 3 Paul Beatles Paul bass #> 4 NA NA Keith guitar # If a row in `x` matches multiple rows in `y`, all the rows in `y` will be # returned once for each matching row in `x`. df1 <- tibble(x = 1:3) df2 <- tibble(x = c(1, 1, 2), y = c(\"first\", \"second\", \"third\")) df1 %>% left_join(df2) #> Joining with `by = join_by(x)` #> # A tibble: 4 × 2 #> x y #> #> 1 1 first #> 2 1 second #> 3 2 third #> 4 3 NA # If a row in `y` also matches multiple rows in `x`, this is known as a # many-to-many relationship, which is typically a result of an improperly # specified join or some kind of messy data. In this case, a warning is # thrown by default: df3 <- tibble(x = c(1, 1, 1, 3)) df3 %>% left_join(df2) #> Joining with `by = join_by(x)` #> Warning: Detected an unexpected many-to-many relationship between `x` and `y`. #> ℹ Row 1 of `x` matches multiple rows in `y`. #> ℹ Row 1 of `y` matches multiple rows in `x`. #> ℹ If a many-to-many relationship is expected, set `relationship = #> \"many-to-many\"` to silence this warning. #> # A tibble: 7 × 2 #> x y #> #> 1 1 first #> 2 1 second #> 3 1 first #> 4 1 second #> 5 1 first #> 6 1 second #> 7 3 NA # In the rare case where a many-to-many relationship is expected, set # `relationship = \"many-to-many\"` to silence this warning df3 %>% left_join(df2, relationship = \"many-to-many\") #> Joining with `by = join_by(x)` #> # A tibble: 7 × 2 #> x y #> #> 1 1 first #> 2 1 second #> 3 1 first #> 4 1 second #> 5 1 first #> 6 1 second #> 7 3 NA # Use `join_by()` with a condition other than `==` to perform an inequality # join. Here we match on every instance where `df1$x > df2$x`. df1 %>% left_join(df2, join_by(x > x)) #> # A tibble: 6 × 3 #> x.x x.y y #> #> 1 1 NA NA #> 2 2 1 first #> 3 2 1 second #> 4 3 1 first #> 5 3 1 second #> 6 3 2 third # By default, NAs match other NAs so that there are two # rows in the output of this join: df1 <- data.frame(x = c(1, NA), y = 2) df2 <- data.frame(x = c(1, NA), z = 3) left_join(df1, df2) #> Joining with `by = join_by(x)` #> x y z #> 1 1 2 3 #> 2 NA 2 3 # You can optionally request that NAs don't match, giving a # a result that more closely resembles SQL joins left_join(df1, df2, na_matches = \"never\") #> Joining with `by = join_by(x)` #> x y z #> 1 1 2 3 #> 2 NA 2 NA"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":null,"dir":"Reference","previous_headings":"","what":"Create, modify, and delete columns — mutate","title":"Create, modify, and delete columns — mutate","text":"mutate() creates new columns functions existing variables. can also modify (name existing column) delete columns (setting value NULL).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create, modify, and delete columns — mutate","text":"","code":"mutate(.data, ...) # S3 method for data.frame mutate( .data, ..., .by = NULL, .keep = c(\"all\", \"used\", \"unused\", \"none\"), .before = NULL, .after = NULL )"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create, modify, and delete columns — mutate","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs. name gives name column output. value can : vector length 1, recycled correct length. vector length current group (whole data frame ungrouped). NULL, remove column. data frame tibble, create multiple columns output. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .keep Control columns .data retained output. Grouping columns columns created ... always kept. \"\" retains columns .data. default. \"used\" retains columns used ... create new columns. useful checking work, displays inputs outputs side--side. \"unused\" retains columns used ... create new columns. useful generate new columns, longer need columns used generate . \"none\" retain extra columns .data. grouping variables columns created ... kept. ., . Optionally, control new columns appear (default add right hand side). See relocate() details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create, modify, and delete columns — mutate","text":"object type .data. output following properties: Columns .data preserved according .keep argument. Existing columns modified ... always returned original location. New columns created ... placed according ..arguments. number rows affected. Columns given value NULL removed. Groups recomputed grouping variable mutated. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"useful-mutate-functions","dir":"Reference","previous_headings":"","what":"Useful mutate functions","title":"Create, modify, and delete columns — mutate","text":"+, -, log(), etc., usual mathematical meanings lead(), lag() dense_rank(), min_rank(), percent_rank(), row_number(), cume_dist(), ntile() cumsum(), cummean(), cummin(), cummax(), cumany(), cumall() na_if(), coalesce() if_else(), recode(), case_when()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"grouped-tibbles","dir":"Reference","previous_headings":"","what":"Grouped tibbles","title":"Create, modify, and delete columns — mutate","text":"mutating expressions computed within groups, may yield different results grouped tibbles. case soon aggregating, lagging, ranking function involved. Compare ungrouped mutate: grouped equivalent: former normalises mass global average whereas latter normalises averages within species levels.","code":"starwars %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) starwars %>% select(name, mass, species) %>% group_by(species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE))"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Create, modify, and delete columns — mutate","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create, modify, and delete columns — mutate","text":"","code":"# Newly created variables are available immediately starwars %>% select(name, mass) %>% mutate( mass2 = mass * 2, mass2_squared = mass2 * mass2 ) #> # A tibble: 87 × 4 #> name mass mass2 mass2_squared #> #> 1 Luke Skywalker 77 154 23716 #> 2 C-3PO 75 150 22500 #> 3 R2-D2 32 64 4096 #> 4 Darth Vader 136 272 73984 #> 5 Leia Organa 49 98 9604 #> 6 Owen Lars 120 240 57600 #> 7 Beru Whitesun Lars 75 150 22500 #> 8 R5-D4 32 64 4096 #> 9 Biggs Darklighter 84 168 28224 #> 10 Obi-Wan Kenobi 77 154 23716 #> # ℹ 77 more rows # As well as adding new variables, you can use mutate() to # remove variables and modify existing variables. starwars %>% select(name, height, mass, homeworld) %>% mutate( mass = NULL, height = height * 0.0328084 # convert to feet ) #> # A tibble: 87 × 3 #> name height homeworld #> #> 1 Luke Skywalker 5.64 Tatooine #> 2 C-3PO 5.48 Tatooine #> 3 R2-D2 3.15 Naboo #> 4 Darth Vader 6.63 Tatooine #> 5 Leia Organa 4.92 Alderaan #> 6 Owen Lars 5.84 Tatooine #> 7 Beru Whitesun Lars 5.41 Tatooine #> 8 R5-D4 3.18 Tatooine #> 9 Biggs Darklighter 6.00 Tatooine #> 10 Obi-Wan Kenobi 5.97 Stewjon #> # ℹ 77 more rows # Use across() with mutate() to apply a transformation # to multiple columns in a tibble. starwars %>% select(name, homeworld, species) %>% mutate(across(!name, as.factor)) #> # A tibble: 87 × 3 #> name homeworld species #> #> 1 Luke Skywalker Tatooine Human #> 2 C-3PO Tatooine Droid #> 3 R2-D2 Naboo Droid #> 4 Darth Vader Tatooine Human #> 5 Leia Organa Alderaan Human #> 6 Owen Lars Tatooine Human #> 7 Beru Whitesun Lars Tatooine Human #> 8 R5-D4 Tatooine Droid #> 9 Biggs Darklighter Tatooine Human #> 10 Obi-Wan Kenobi Stewjon Human #> # ℹ 77 more rows # see more in ?across # Window functions are useful for grouped mutates: starwars %>% select(name, mass, homeworld) %>% group_by(homeworld) %>% mutate(rank = min_rank(desc(mass))) #> # A tibble: 87 × 4 #> # Groups: homeworld [49] #> name mass homeworld rank #> #> 1 Luke Skywalker 77 Tatooine 5 #> 2 C-3PO 75 Tatooine 6 #> 3 R2-D2 32 Naboo 6 #> 4 Darth Vader 136 Tatooine 1 #> 5 Leia Organa 49 Alderaan 2 #> 6 Owen Lars 120 Tatooine 2 #> 7 Beru Whitesun Lars 75 Tatooine 6 #> 8 R5-D4 32 Tatooine 8 #> 9 Biggs Darklighter 84 Tatooine 3 #> 10 Obi-Wan Kenobi 77 Stewjon 1 #> # ℹ 77 more rows # see `vignette(\"window-functions\")` for more details # By default, new columns are placed on the far right. df <- tibble(x = 1, y = 2) df %>% mutate(z = x + y) #> # A tibble: 1 × 3 #> x y z #> #> 1 1 2 3 df %>% mutate(z = x + y, .before = 1) #> # A tibble: 1 × 3 #> z x y #> #> 1 3 1 2 df %>% mutate(z = x + y, .after = x) #> # A tibble: 1 × 3 #> x z y #> #> 1 1 3 2 # By default, mutate() keeps all columns from the input data. df <- tibble(x = 1, y = 2, a = \"a\", b = \"b\") df %>% mutate(z = x + y, .keep = \"all\") # the default #> # A tibble: 1 × 5 #> x y a b z #> #> 1 1 2 a b 3 df %>% mutate(z = x + y, .keep = \"used\") #> # A tibble: 1 × 3 #> x y z #> #> 1 1 2 3 df %>% mutate(z = x + y, .keep = \"unused\") #> # A tibble: 1 × 3 #> a b z #> #> 1 a b 3 df %>% mutate(z = x + y, .keep = \"none\") #> # A tibble: 1 × 1 #> z #> #> 1 3 # Grouping ---------------------------------------- # The mutate operation may yield different results on grouped # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: starwars %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> name mass species mass_norm #> #> 1 Luke Skywalker 77 Human 0.791 #> 2 C-3PO 75 Droid 0.771 #> 3 R2-D2 32 Droid 0.329 #> 4 Darth Vader 136 Human 1.40 #> 5 Leia Organa 49 Human 0.504 #> 6 Owen Lars 120 Human 1.23 #> 7 Beru Whitesun Lars 75 Human 0.771 #> 8 R5-D4 32 Droid 0.329 #> 9 Biggs Darklighter 84 Human 0.863 #> 10 Obi-Wan Kenobi 77 Human 0.791 #> # ℹ 77 more rows # Whereas this normalises `mass` by the averages within species # levels: starwars %>% select(name, mass, species) %>% group_by(species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #> # A tibble: 87 × 4 #> # Groups: species [38] #> name mass species mass_norm #> #> 1 Luke Skywalker 77 Human 0.947 #> 2 C-3PO 75 Droid 1.08 #> 3 R2-D2 32 Droid 0.459 #> 4 Darth Vader 136 Human 1.67 #> 5 Leia Organa 49 Human 0.603 #> 6 Owen Lars 120 Human 1.48 #> 7 Beru Whitesun Lars 75 Human 0.922 #> 8 R5-D4 32 Droid 0.459 #> 9 Biggs Darklighter 84 Human 1.03 #> 10 Obi-Wan Kenobi 77 Human 0.947 #> # ℹ 77 more rows # Indirection ---------------------------------------- # Refer to column names stored as strings with the `.data` pronoun: vars <- c(\"mass\", \"height\") mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) #> # A tibble: 87 × 15 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Leia Org… 150 49 brown light brown 19 fema… #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Beru Whi… 165 75 brown light blue 47 fema… #> 8 R5-D4 97 32 NA white, red red NA none #> 9 Biggs Da… 183 84 black light brown 24 male #> 10 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 7 more variables: gender , homeworld , species , #> # films , vehicles , starships , prod # Learn more in ?rlang::args_data_masking"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Mutate multiple columns — mutate_all","title":"Mutate multiple columns — mutate_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants mutate() transmute() make easy apply transformation multiple variables. three variants: _all affects every variable _at affects variables selected character vector vars() _if affects variables selected predicate function:","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mutate multiple columns — mutate_all","text":"","code":"mutate_all(.tbl, .funs, ...) mutate_if(.tbl, .predicate, .funs, ...) mutate_at(.tbl, .vars, .funs, ..., .cols = NULL) transmute_all(.tbl, .funs, ...) transmute_if(.tbl, .predicate, .funs, ...) transmute_at(.tbl, .vars, .funs, ..., .cols = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mutate multiple columns — mutate_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .cols argument renamed .vars fit dplyr's terminology deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mutate multiple columns — mutate_all","text":"data frame. default, newly created columns shortest names needed uniquely identify output. force inclusion name, even needed, name input (see examples details).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Mutate multiple columns — mutate_all","text":"applied grouped tibble, operations applied grouping variables. behaviour depends whether selection implicit (selections) explicit (selections). Grouping variables covered explicit selections mutate_at() transmute_at() always error. Add -group_cols() vars() selection avoid : remove group_vars() character vector column names: Grouping variables covered implicit selections ignored mutate_all(), transmute_all(), mutate_if(), transmute_if().","code":"data %>% mutate_at(vars(-group_cols(), ...), myoperation) nms <- setdiff(nms, group_vars(data)) data %>% mutate_at(vars, myoperation)"},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"naming","dir":"Reference","previous_headings":"","what":"Naming","title":"Mutate multiple columns — mutate_all","text":"names new columns derived names input variables names functions. one unnamed function (.e. .funs unnamed list length one), names input variables used name new columns; _at functions, one unnamed variable (.e., .vars form vars(a_single_column)) .funs length greater one, names functions used name new columns; otherwise, new names created concatenating names input variables names functions, separated underscore \"_\". .funs argument can named unnamed list. function unnamed name derived automatically, name form \"fn#\" used. Similarly, vars() accepts named unnamed arguments. variable .vars named, new column name created. Name collisions new columns disambiguated using unique suffix.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/mutate_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mutate multiple columns — mutate_all","text":"","code":"iris <- as_tibble(iris) # All variants can be passed functions and additional arguments, # purrr-style. The _at() variants directly support strings. Here # we'll scale the variables `height` and `mass`: scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm) starwars %>% mutate_at(c(\"height\", \"mass\"), scale2) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… NA NA blond fair blue 19 male #> 2 C-3PO NA NA NA gold yellow 112 none #> 3 R2-D2 NA NA NA white, bl… red 33 none #> 4 Darth Va… NA NA none white yellow 41.9 male #> 5 Leia Org… NA NA brown light brown 19 fema… #> 6 Owen Lars NA NA brown, gr… light blue 52 male #> 7 Beru Whi… NA NA brown light blue 47 fema… #> 8 R5-D4 NA NA NA white, red red NA none #> 9 Biggs Da… NA NA black light brown 24 male #> 10 Obi-Wan … NA NA auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # -> starwars %>% mutate(across(c(\"height\", \"mass\"), scale2)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… NA NA blond fair blue 19 male #> 2 C-3PO NA NA NA gold yellow 112 none #> 3 R2-D2 NA NA NA white, bl… red 33 none #> 4 Darth Va… NA NA none white yellow 41.9 male #> 5 Leia Org… NA NA brown light brown 19 fema… #> 6 Owen Lars NA NA brown, gr… light blue 52 male #> 7 Beru Whi… NA NA brown light blue 47 fema… #> 8 R5-D4 NA NA NA white, red red NA none #> 9 Biggs Da… NA NA black light brown 24 male #> 10 Obi-Wan … NA NA auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # You can pass additional arguments to the function: starwars %>% mutate_at(c(\"height\", \"mass\"), scale2, na.rm = TRUE) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue 19 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 112 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red 33 none #> 4 Darth… 0.788 0.228 none white yellow 41.9 male #> 5 Leia … -0.708 -0.285 brown light brown 19 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue 52 male #> 7 Beru … -0.276 -0.132 brown light blue 47 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown 24 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% mutate_at(c(\"height\", \"mass\"), ~scale2(., na.rm = TRUE)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue 19 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 112 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red 33 none #> 4 Darth… 0.788 0.228 none white yellow 41.9 male #> 5 Leia … -0.708 -0.285 brown light brown 19 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue 52 male #> 7 Beru … -0.276 -0.132 brown light blue 47 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown 24 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # -> starwars %>% mutate(across(c(\"height\", \"mass\"), ~ scale2(.x, na.rm = TRUE))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue 19 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 112 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red 33 none #> 4 Darth… 0.788 0.228 none white yellow 41.9 male #> 5 Leia … -0.708 -0.285 brown light brown 19 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue 52 male #> 7 Beru … -0.276 -0.132 brown light blue 47 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown 24 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): iris %>% mutate_at(vars(matches(\"Sepal\")), log) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 1.63 1.25 1.4 0.2 setosa #> 2 1.59 1.10 1.4 0.2 setosa #> 3 1.55 1.16 1.3 0.2 setosa #> 4 1.53 1.13 1.5 0.2 setosa #> 5 1.61 1.28 1.4 0.2 setosa #> 6 1.69 1.36 1.7 0.4 setosa #> 7 1.53 1.22 1.4 0.3 setosa #> 8 1.61 1.22 1.5 0.2 setosa #> 9 1.48 1.06 1.4 0.2 setosa #> 10 1.59 1.13 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(matches(\"Sepal\"), log)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 1.63 1.25 1.4 0.2 setosa #> 2 1.59 1.10 1.4 0.2 setosa #> 3 1.55 1.16 1.3 0.2 setosa #> 4 1.53 1.13 1.5 0.2 setosa #> 5 1.61 1.28 1.4 0.2 setosa #> 6 1.69 1.36 1.7 0.4 setosa #> 7 1.53 1.22 1.4 0.3 setosa #> 8 1.61 1.22 1.5 0.2 setosa #> 9 1.48 1.06 1.4 0.2 setosa #> 10 1.59 1.13 1.5 0.1 setosa #> # ℹ 140 more rows # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we divide all the numeric columns by 100: starwars %>% mutate_if(is.numeric, scale2, na.rm = TRUE) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue -0.443 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 0.158 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red -0.353 none #> 4 Darth… 0.788 0.228 none white yellow -0.295 male #> 5 Leia … -0.708 -0.285 brown light brown -0.443 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue -0.230 male #> 7 Beru … -0.276 -0.132 brown light blue -0.262 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown -0.411 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray -0.198 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships starwars %>% mutate(across(where(is.numeric), ~ scale2(.x, na.rm = TRUE))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke … -0.0749 -0.120 blond fair blue -0.443 male #> 2 C-3PO -0.219 -0.132 NA gold yellow 0.158 none #> 3 R2-D2 -2.26 -0.385 NA white, bl… red -0.353 none #> 4 Darth… 0.788 0.228 none white yellow -0.295 male #> 5 Leia … -0.708 -0.285 brown light brown -0.443 fema… #> 6 Owen … 0.0976 0.134 brown, gr… light blue -0.230 male #> 7 Beru … -0.276 -0.132 brown light blue -0.262 fema… #> 8 R5-D4 -2.23 -0.385 NA white, red red NA none #> 9 Biggs… 0.241 -0.0786 black light brown -0.411 male #> 10 Obi-W… 0.213 -0.120 auburn, w… fair blue-gray -0.198 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships # mutate_if() is particularly useful for transforming variables from # one type to another iris %>% mutate_if(is.factor, as.character) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate_if(is.double, as.integer) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 3 1 0 setosa #> 2 4 3 1 0 setosa #> 3 4 3 1 0 setosa #> 4 4 3 1 0 setosa #> 5 5 3 1 0 setosa #> 6 5 3 1 0 setosa #> 7 4 3 1 0 setosa #> 8 5 3 1 0 setosa #> 9 4 2 1 0 setosa #> 10 4 3 1 0 setosa #> # ℹ 140 more rows # -> iris %>% mutate(across(where(is.factor), as.character)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows iris %>% mutate(across(where(is.double), as.integer)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5 3 1 0 setosa #> 2 4 3 1 0 setosa #> 3 4 3 1 0 setosa #> 4 4 3 1 0 setosa #> 5 5 3 1 0 setosa #> 6 5 3 1 0 setosa #> 7 4 3 1 0 setosa #> 8 5 3 1 0 setosa #> 9 4 2 1 0 setosa #> 10 4 3 1 0 setosa #> # ℹ 140 more rows # Multiple transformations ---------------------------------------- # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: iris %>% mutate_if(is.numeric, list(scale2, log)) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_fn1 , Sepal.Width_fn1 , #> # Petal.Length_fn1 , Petal.Width_fn1 , #> # Sepal.Length_fn2 , Sepal.Width_fn2 , #> # Petal.Length_fn2 , Petal.Width_fn2 iris %>% mutate_if(is.numeric, list(~scale2(.), ~log(.))) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_scale2 , #> # Sepal.Width_scale2 , Petal.Length_scale2 , #> # Petal.Width_scale2 , Sepal.Length_log , #> # Sepal.Width_log , Petal.Length_log , Petal.Width_log iris %>% mutate_if(is.numeric, list(scale = scale2, log = log)) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_scale , Sepal.Width_scale , #> # Petal.Length_scale , Petal.Width_scale , #> # Sepal.Length_log , Sepal.Width_log , #> # Petal.Length_log , Petal.Width_log # -> iris %>% as_tibble() %>% mutate(across(where(is.numeric), list(scale = scale2, log = log))) #> # A tibble: 150 × 13 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 8 more variables: Sepal.Length_scale , Sepal.Length_log , #> # Sepal.Width_scale , Sepal.Width_log , #> # Petal.Length_scale , Petal.Length_log , #> # Petal.Width_scale , Petal.Width_log # When there's only one function in the list, it modifies existing # variables in place. Give it a name to instead create new variables: iris %>% mutate_if(is.numeric, list(scale2)) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 -0.898 1.02 -1.34 -1.31 setosa #> 2 -1.14 -0.132 -1.34 -1.31 setosa #> 3 -1.38 0.327 -1.39 -1.31 setosa #> 4 -1.50 0.0979 -1.28 -1.31 setosa #> 5 -1.02 1.25 -1.34 -1.31 setosa #> 6 -0.535 1.93 -1.17 -1.05 setosa #> 7 -1.50 0.786 -1.34 -1.18 setosa #> 8 -1.02 0.786 -1.28 -1.31 setosa #> 9 -1.74 -0.361 -1.34 -1.31 setosa #> 10 -1.14 0.0979 -1.28 -1.44 setosa #> # ℹ 140 more rows iris %>% mutate_if(is.numeric, list(scale = scale2)) #> # A tibble: 150 × 9 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows #> # ℹ 4 more variables: Sepal.Length_scale , Sepal.Width_scale , #> # Petal.Length_scale , Petal.Width_scale "},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":null,"dir":"Reference","previous_headings":"","what":"Count unique combinations — n_distinct","title":"Count unique combinations — n_distinct","text":"n_distinct() counts number unique/distinct combinations set one vectors. faster concise equivalent nrow(unique(data.frame(...))).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Count unique combinations — n_distinct","text":"","code":"n_distinct(..., na.rm = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Count unique combinations — n_distinct","text":"... Unnamed vectors. multiple vectors supplied, length. na.rm TRUE, exclude missing observations count. multiple vectors ..., observation excluded values missing.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Count unique combinations — n_distinct","text":"single number.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/n_distinct.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Count unique combinations — n_distinct","text":"","code":"x <- c(1, 1, 2, 2, 2) n_distinct(x) #> [1] 2 y <- c(3, 3, NA, 3, 3) n_distinct(y) #> [1] 2 n_distinct(y, na.rm = TRUE) #> [1] 1 # Pairs (1, 3), (2, 3), and (2, NA) are distinct n_distinct(x, y) #> [1] 3 # (2, NA) is dropped, leaving 2 distinct combinations n_distinct(x, y, na.rm = TRUE) #> [1] 2 # Also works with data frames n_distinct(data.frame(x, y)) #> [1] 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert values to NA — na_if","title":"Convert values to NA — na_if","text":"translation SQL command NULLIF. useful want convert annoying value NA.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert values to NA — na_if","text":"","code":"na_if(x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert values to NA — na_if","text":"x Vector modify y Value vector compare . x y equal, value x replaced NA. y cast type x comparison. y recycled size x comparison. means y can vector size x, time single value.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert values to NA — na_if","text":"modified version x replaces values equal y NA.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/na_if.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert values to NA — na_if","text":"","code":"na_if(1:5, 5:1) #> [1] 1 2 NA 4 5 x <- c(1, -1, 0, 10) 100 / x #> [1] 100 -100 Inf 10 100 / na_if(x, 0) #> [1] 100 -100 NA 10 y <- c(\"abc\", \"def\", \"\", \"ghi\") na_if(y, \"\") #> [1] \"abc\" \"def\" NA \"ghi\" # `na_if()` allows you to replace `NaN` with `NA`, # even though `NaN == NaN` returns `NA` z <- c(1, NaN, NA, 2, NaN) na_if(z, NaN) #> [1] 1 NA NA 2 NA # `na_if()` is particularly useful inside `mutate()`, # and is meant for use with vectors rather than entire data frames starwars %>% select(name, eye_color) %>% mutate(eye_color = na_if(eye_color, \"unknown\")) #> # A tibble: 87 × 2 #> name eye_color #> #> 1 Luke Skywalker blue #> 2 C-3PO yellow #> 3 R2-D2 red #> 4 Darth Vader yellow #> 5 Leia Organa brown #> 6 Owen Lars blue #> 7 Beru Whitesun Lars blue #> 8 R5-D4 red #> 9 Biggs Darklighter brown #> 10 Obi-Wan Kenobi blue-gray #> # ℹ 77 more rows # `na_if()` can also be used with `mutate()` and `across()` # to alter multiple columns starwars %>% mutate(across(where(is.character), ~na_if(., \"unknown\"))) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Leia Org… 150 49 brown light brown 19 fema… #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Beru Whi… 165 75 brown light blue 47 fema… #> 8 R5-D4 97 32 NA white, red red NA none #> 9 Biggs Da… 183 84 black light brown 24 male #> 10 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":null,"dir":"Reference","previous_headings":"","what":"Compare two numeric vectors — near","title":"Compare two numeric vectors — near","text":"safe way comparing two vectors floating point numbers (pairwise) equal. safer using ==, built tolerance","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compare two numeric vectors — near","text":"","code":"near(x, y, tol = .Machine$double.eps^0.5)"},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compare two numeric vectors — near","text":"x, y Numeric vectors compare tol Tolerance comparison.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/near.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compare two numeric vectors — near","text":"","code":"sqrt(2) ^ 2 == 2 #> [1] FALSE near(sqrt(2) ^ 2, 2) #> [1] TRUE"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":null,"dir":"Reference","previous_headings":"","what":"Nest by one or more variables — nest_by","title":"Nest by one or more variables — nest_by","text":"nest_by() closely related group_by(). However, instead storing group structure metadata, made explicit data, giving group key single row along list-column data frames contain data. nest_by() returns rowwise data frame, makes operations grouped data particularly elegant. See vignette(\"rowwise\") details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Nest by one or more variables — nest_by","text":"","code":"nest_by(.data, ..., .key = \"data\", .keep = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Nest by one or more variables — nest_by","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... group_by(), variables computations group . Computations always done ungrouped data frame. perform computations grouped data, need use separate mutate() step group_by(). Computations allowed nest_by(). ungroup(), variables remove grouping. .key Name list column .keep grouping columns kept list column.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Nest by one or more variables — nest_by","text":"rowwise data frame. output following properties: rows come underlying group_keys(). columns grouping keys plus one list-column data frames. Data frame attributes preserved, nest_by() fundamentally creates new data frame. tbl one row per unique combination grouping variables. first columns grouping variables, followed list column tibbles matching rows remaining columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Nest by one or more variables — nest_by","text":"Note df %>% nest_by(x, y) roughly equivalent want unnest nested data frame, can either use tidyr::unnest() take advantage reframe()s multi-row behaviour:","code":"df %>% group_by(x, y) %>% summarise(data = list(pick(everything()))) %>% rowwise() nested %>% reframe(data)"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"lifecycle","dir":"Reference","previous_headings":"","what":"Lifecycle","title":"Nest by one or more variables — nest_by","text":"nest_by() stable tidyr::nest(.=) provides similar behavior. may deprecated future.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Nest by one or more variables — nest_by","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dplyr (data.frame, grouped_df) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Nest by one or more variables — nest_by","text":"","code":"# After nesting, you get one row per group iris %>% nest_by(Species) #> # A tibble: 3 × 2 #> # Rowwise: Species #> Species data #> > #> 1 setosa [50 × 4] #> 2 versicolor [50 × 4] #> 3 virginica [50 × 4] starwars %>% nest_by(species) #> # A tibble: 38 × 2 #> # Rowwise: species #> species data #> > #> 1 Aleena [1 × 13] #> 2 Besalisk [1 × 13] #> 3 Cerean [1 × 13] #> 4 Chagrian [1 × 13] #> 5 Clawdite [1 × 13] #> 6 Droid [6 × 13] #> 7 Dug [1 × 13] #> 8 Ewok [1 × 13] #> 9 Geonosian [1 × 13] #> 10 Gungan [3 × 13] #> # ℹ 28 more rows # The output is grouped by row, which makes modelling particularly easy models <- mtcars %>% nest_by(cyl) %>% mutate(model = list(lm(mpg ~ wt, data = data))) models #> # A tibble: 3 × 3 #> # Rowwise: cyl #> cyl data model #> > #> 1 4 [11 × 10] #> 2 6 [7 × 10] #> 3 8 [14 × 10] models %>% summarise(rsq = summary(model)$r.squared) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 2 #> # Groups: cyl [3] #> cyl rsq #> #> 1 4 0.509 #> 2 6 0.465 #> 3 8 0.423 # This is particularly elegant with the broom functions models %>% summarise(broom::glance(model)) #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 3 × 13 #> # Groups: cyl [3] #> cyl r.squared adj.r.squared sigma statistic p.value df logLik AIC #> #> 1 4 0.509 0.454 3.33 9.32 0.0137 1 -27.7 61.5 #> 2 6 0.465 0.357 1.17 4.34 0.0918 1 -9.83 25.7 #> 3 8 0.423 0.375 2.02 8.80 0.0118 1 -28.7 63.3 #> # ℹ 4 more variables: BIC , deviance , df.residual , #> # nobs models %>% reframe(broom::tidy(model)) #> # A tibble: 6 × 6 #> cyl term estimate std.error statistic p.value #> #> 1 4 (Intercept) 39.6 4.35 9.10 0.00000777 #> 2 4 wt -5.65 1.85 -3.05 0.0137 #> 3 6 (Intercept) 28.4 4.18 6.79 0.00105 #> 4 6 wt -2.78 1.33 -2.08 0.0918 #> 5 8 (Intercept) 23.9 3.01 7.94 0.00000405 #> 6 8 wt -2.19 0.739 -2.97 0.0118 # Note that you can also `reframe()` to unnest the data models %>% reframe(data) #> # A tibble: 32 × 11 #> cyl mpg disp hp drat wt qsec vs am gear carb #> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 4 22.8 141. 95 3.92 3.15 22.9 1 0 4 2 #> 4 4 32.4 78.7 66 4.08 2.2 19.5 1 1 4 1 #> 5 4 30.4 75.7 52 4.93 1.62 18.5 1 1 4 2 #> 6 4 33.9 71.1 65 4.22 1.84 19.9 1 1 4 1 #> 7 4 21.5 120. 97 3.7 2.46 20.0 1 0 3 1 #> 8 4 27.3 79 66 4.08 1.94 18.9 1 1 4 1 #> 9 4 26 120. 91 4.43 2.14 16.7 0 1 5 2 #> 10 4 30.4 95.1 113 3.77 1.51 16.9 1 1 5 2 #> # ℹ 22 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":null,"dir":"Reference","previous_headings":"","what":"Nest join — nest_join","title":"Nest join — nest_join","text":"nest join leaves x almost unchanged, except adds new list-column, element contains rows y match corresponding row x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Nest join — nest_join","text":"","code":"nest_join(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ...) # S3 method for data.frame nest_join( x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ..., na_matches = c(\"na\", \"never\"), unmatched = \"drop\" )"},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Nest join — nest_join","text":"x, y pair data frames, data frame extensions (e.g. tibble), lazy data frames (e.g. dbplyr dtplyr). See Methods, , details. join specification created join_by(), character vector variables join . NULL, default, *_join() perform natural join, using variables common across x y. message lists variables can check correct; suppress message supplying explicitly. join different variables x y, use join_by() specification. example, join_by(== b) match x$y$b. join multiple variables, use join_by() specification multiple expressions. example, join_by(== b, c == d) match x$y$b x$c y$d. column names x y, can shorten listing variable names, like join_by(, c). join_by() can also used perform inequality, rolling, overlap joins. See documentation ?join_by details types joins. simple equality joins, can alternatively specify character vector variable names join . example, = c(\"\", \"b\") joins x$y$x$b y$b. variable names differ x y, use named character vector like = c(\"x_a\" = \"y_a\", \"x_b\" = \"y_b\"). perform cross-join, generating combinations x y, see cross_join(). copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . keep new list-column contain join keys? default preserve join keys inequality joins. name name list-column created join. NULL, default, name y used. ... parameters passed onto methods. na_matches two NA two NaN values match? \"na\", default, treats two NA two NaN values equal, like %%, match(), merge(). \"never\" treats two NA two NaN values different, never match together values. similar joins database sources base::merge(incomparables = NA). unmatched unmatched keys result dropped rows handled? \"drop\" drops unmatched keys result. \"error\" throws error unmatched keys detected. unmatched intended protect accidentally dropping rows join. checks unmatched keys input potentially drop rows. left joins, checks y. right joins, checks x. inner joins, checks x y. case, unmatched also allowed character vector length 2 specify behavior x y independently.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Nest join — nest_join","text":"output: type x (including groups). exactly number rows x. Contains columns x order values. modified (slightly) keep = FALSE, columns listed coerced common type across x y. Gains one new column called {name} far right, list column containing data frames type y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"relationship-to-other-joins","dir":"Reference","previous_headings":"","what":"Relationship to other joins","title":"Nest join — nest_join","text":"can recreate many joins result nest join: inner_join() nest_join() plus tidyr::unnest(). left_join() nest_join() plus tidyr::unnest(keep_empty = TRUE). semi_join() nest_join() plus filter() check every element data least one row. anti_join() nest_join() plus filter() check every element zero rows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Nest join — nest_join","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/nest_join.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Nest join — nest_join","text":"","code":"df1 <- tibble(x = 1:3) df2 <- tibble(x = c(2, 3, 3), y = c(\"a\", \"b\", \"c\")) out <- nest_join(df1, df2) #> Joining with `by = join_by(x)` out #> # A tibble: 3 × 2 #> x df2 #> #> 1 1 #> 2 2 #> 3 3 out$df2 #> [[1]] #> # A tibble: 0 × 1 #> # ℹ 1 variable: y #> #> [[2]] #> # A tibble: 1 × 1 #> y #> #> 1 a #> #> [[3]] #> # A tibble: 2 × 1 #> y #> #> 1 b #> 2 c #>"},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":null,"dir":"Reference","previous_headings":"","what":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"new_grouped_df() new_rowwise_df() constructors designed high-performance check types, values. means caller's responsibility create valid values, hence expert use . validate_grouped_df() validate_rowwise_df() validate attributes grouped_df rowwise_df.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"","code":"new_grouped_df(x, groups, ..., class = character()) validate_grouped_df(x, check_bounds = FALSE) new_rowwise_df(data, group_data = NULL, ..., class = character()) validate_rowwise_df(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"x data frame groups grouped structure, groups data frame. last column called .rows list 1 based integer vectors 1 number rows .data. ... additional attributes class additional class, prepended canonical classes. check_bounds whether check indices bounds problems grouped_df objects","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/new_grouped_df.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Low-level construction and validation for the grouped_df and rowwise_df classes — new_grouped_df","text":"","code":"# 5 bootstrap samples tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(\".rows\" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) # mean of each bootstrap sample summarise(tbl, x = mean(x)) #> # A tibble: 5 × 1 #> x #> #> 1 0.181 #> 2 -0.0442 #> 3 0.450 #> 4 -0.730 #> 5 -0.369"},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract the first, last, or nth value from a vector — nth","title":"Extract the first, last, or nth value from a vector — nth","text":"useful helpers extracting single value vector. guaranteed return meaningful value, even input shorter expected. can also provide optional secondary vector defines ordering.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract the first, last, or nth value from a vector — nth","text":"","code":"nth(x, n, order_by = NULL, default = NULL, na_rm = FALSE) first(x, order_by = NULL, default = NULL, na_rm = FALSE) last(x, order_by = NULL, default = NULL, na_rm = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract the first, last, or nth value from a vector — nth","text":"x vector n nth(), single integer specifying position. Negative integers index end (.e. -1L return last value vector). order_by optional vector size x used determine order. default default value use position exist x. NULL, default, missing value used. supplied, must single value, cast type x. x list , default allowed value. type size restrictions case. na_rm missing values x removed extracting value?","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract the first, last, or nth value from a vector — nth","text":"x list, single element list. Otherwise, vector type x size 1.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Extract the first, last, or nth value from a vector — nth","text":"vector types, first(x), last(x), nth(x, n) work like x[[1]], x[[length(x)], x[[n]], respectively. primary exception data frames, instead retrieve rows, .e. x[1, ], x[nrow(x), ], x[n, ]. consistent tidyverse/vctrs principle treats data frames vector rows, rather vector columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/nth.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract the first, last, or nth value from a vector — nth","text":"","code":"x <- 1:10 y <- 10:1 first(x) #> [1] 1 last(y) #> [1] 1 nth(x, 1) #> [1] 1 nth(x, 5) #> [1] 5 nth(x, -2) #> [1] 9 # `first()` and `last()` are often useful in `summarise()` df <- tibble(x = x, y = y) df %>% summarise( across(x:y, first, .names = \"{col}_first\"), y_last = last(y) ) #> # A tibble: 1 × 3 #> x_first y_first y_last #> #> 1 1 10 1 # Selecting a position that is out of bounds returns a default value nth(x, 11) #> [1] NA nth(x, 0) #> [1] NA # This out of bounds behavior also applies to empty vectors first(integer()) #> [1] NA # You can customize the default value with `default` nth(x, 11, default = -1L) #> [1] -1 first(integer(), default = 0L) #> [1] 0 # `order_by` provides optional ordering last(x) #> [1] 10 last(x, order_by = y) #> [1] 1 # `na_rm` removes missing values before extracting the value z <- c(NA, NA, 1, 3, NA, 5, NA) first(z) #> [1] NA first(z, na_rm = TRUE) #> [1] 1 last(z, na_rm = TRUE) #> [1] 5 nth(z, 3, na_rm = TRUE) #> [1] 5 # For data frames, these select entire rows df <- tibble(a = 1:5, b = 6:10) first(df) #> # A tibble: 1 × 2 #> a b #> #> 1 1 6 nth(df, 4) #> # A tibble: 1 × 2 #> a b #> #> 1 4 9"},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":null,"dir":"Reference","previous_headings":"","what":"Bucket a numeric vector into n groups — ntile","title":"Bucket a numeric vector into n groups — ntile","text":"ntile() sort rough rank, breaks input vector n buckets. length(x) integer multiple n, size buckets differ one, larger buckets coming first. Unlike ranking functions, ntile() ignores ties: create evenly sized buckets even value x ends different buckets.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bucket a numeric vector into n groups — ntile","text":"","code":"ntile(x = row_number(), n)"},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bucket a numeric vector into n groups — ntile","text":"x vector rank default, smallest values get smallest ranks. Use desc() reverse direction largest values get smallest ranks. Missing values given rank NA. Use coalesce(x, Inf) coalesce(x, -Inf) want treat largest smallest values respectively. rank multiple columns , supply data frame. n Number groups bucket ","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/ntile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bucket a numeric vector into n groups — ntile","text":"","code":"x <- c(5, 1, 3, 2, 2, NA) ntile(x, 2) #> [1] 2 1 2 1 1 NA ntile(x, 4) #> [1] 4 1 3 1 2 NA # If the bucket sizes are uneven, the larger buckets come first ntile(1:8, 3) #> [1] 1 1 1 2 2 2 3 3 # Ties are ignored ntile(rep(1, 8), 3) #> [1] 1 1 1 2 2 2 3 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":null,"dir":"Reference","previous_headings":"","what":"A helper function for ordering window function output — order_by","title":"A helper function for ordering window function output — order_by","text":"function makes possible control ordering window functions R specific ordering parameter. translated SQL modify order clause function.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A helper function for ordering window function output — order_by","text":"","code":"order_by(order_by, call)"},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A helper function for ordering window function output — order_by","text":"order_by vector order_by call function call window function, first argument vector operated ","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"A helper function for ordering window function output — order_by","text":"function works changing call instead call with_order() appropriate arguments.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/order_by.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"A helper function for ordering window function output — order_by","text":"","code":"order_by(10:1, cumsum(1:10)) #> [1] 55 54 52 49 45 40 34 27 19 10 x <- 10:1 y <- 1:10 order_by(x, cumsum(y)) #> [1] 55 54 52 49 45 40 34 27 19 10 df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, running = cumsum(value)) arrange(wrong, year) #> year value running #> 1 2000 0 34 #> 2 2001 1 51 #> 3 2002 4 55 #> 4 2003 9 34 #> 5 2004 16 50 #> 6 2005 25 25 right <- mutate(scrambled, running = order_by(year, cumsum(value))) arrange(right, year) #> year value running #> 1 2000 0 0 #> 2 2001 1 1 #> 3 2002 4 5 #> 4 2003 9 14 #> 5 2004 16 30 #> 6 2005 25 55"},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":null,"dir":"Reference","previous_headings":"","what":"Proportional ranking functions — percent_rank","title":"Proportional ranking functions — percent_rank","text":"two ranking functions implement two slightly different ways compute percentile. x_i x: cume_dist(x) counts total number values less equal x_i, divides number observations. percent_rank(x) counts total number values less x_i, divides number observations minus 1. cases, missing values ignored counting number observations.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Proportional ranking functions — percent_rank","text":"","code":"percent_rank(x) cume_dist(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Proportional ranking functions — percent_rank","text":"x vector rank default, smallest values get smallest ranks. Use desc() reverse direction largest values get smallest ranks. Missing values given rank NA. Use coalesce(x, Inf) coalesce(x, -Inf) want treat largest smallest values respectively. rank multiple columns , supply data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Proportional ranking functions — percent_rank","text":"numeric vector containing proportion.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/percent_rank.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Proportional ranking functions — percent_rank","text":"","code":"x <- c(5, 1, 3, 2, 2) cume_dist(x) #> [1] 1.0 0.2 0.8 0.6 0.6 percent_rank(x) #> [1] 1.00 0.00 0.75 0.25 0.25 # You can understand what's going on by computing it by hand sapply(x, function(xi) sum(x <= xi) / length(x)) #> [1] 1.0 0.2 0.8 0.6 0.6 sapply(x, function(xi) sum(x < xi) / (length(x) - 1)) #> [1] 1.00 0.00 0.75 0.25 0.25 # The real computations are a little more complex in order to # correctly deal with missing values"},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":null,"dir":"Reference","previous_headings":"","what":"Select a subset of columns — pick","title":"Select a subset of columns — pick","text":"pick() provides way easily select subset columns data using select() semantics inside \"data-masking\" function like mutate() summarise(). pick() returns data frame containing selected columns current group. pick() complementary across(): pick(), typically apply function full data frame. across(), typically apply function column.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select a subset of columns — pick","text":"","code":"pick(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select a subset of columns — pick","text":"... Columns pick. pick grouping columns already automatically handled verb (.e. summarise() mutate()).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Select a subset of columns — pick","text":"tibble containing selected columns current group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Select a subset of columns — pick","text":"Theoretically, pick() intended replaceable equivalent call tibble(). example, pick(, c) replaced tibble(= , c = c), pick(everything()) data frame cols , b, c replaced tibble(= , b = b, c = c). pick() specially handles case empty selection returning 1 row, 0 column tibble, exact replacement like:","code":"size <- vctrs::vec_size_common(..., .absent = 1L) out <- vctrs::vec_recycle_common(..., .size = size) tibble::new_tibble(out, nrow = size)"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/pick.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select a subset of columns — pick","text":"","code":"df <- tibble( x = c(3, 2, 2, 2, 1), y = c(0, 2, 1, 1, 4), z1 = c(\"a\", \"a\", \"a\", \"b\", \"a\"), z2 = c(\"c\", \"d\", \"d\", \"a\", \"c\") ) df #> # A tibble: 5 × 4 #> x y z1 z2 #> #> 1 3 0 a c #> 2 2 2 a d #> 3 2 1 a d #> 4 2 1 b a #> 5 1 4 a c # `pick()` provides a way to select a subset of your columns using # tidyselect. It returns a data frame. df %>% mutate(cols = pick(x, y)) #> # A tibble: 5 × 5 #> x y z1 z2 cols$x $y #> #> 1 3 0 a c 3 0 #> 2 2 2 a d 2 2 #> 3 2 1 a d 2 1 #> 4 2 1 b a 2 1 #> 5 1 4 a c 1 4 # This is useful for functions that take data frames as inputs. # For example, you can compute a joint rank between `x` and `y`. df %>% mutate(rank = dense_rank(pick(x, y))) #> # A tibble: 5 × 5 #> x y z1 z2 rank #> #> 1 3 0 a c 4 #> 2 2 2 a d 3 #> 3 2 1 a d 2 #> 4 2 1 b a 2 #> 5 1 4 a c 1 # `pick()` is also useful as a bridge between data-masking functions (like # `mutate()` or `group_by()`) and functions with tidy-select behavior (like # `select()`). For example, you can use `pick()` to create a wrapper around # `group_by()` that takes a tidy-selection of columns to group on. For more # bridge patterns, see # https://rlang.r-lib.org/reference/topic-data-mask-programming.html#bridge-patterns. my_group_by <- function(data, cols) { group_by(data, pick({{ cols }})) } df %>% my_group_by(c(x, starts_with(\"z\"))) #> # A tibble: 5 × 4 #> # Groups: x, z1, z2 [4] #> x y z1 z2 #> #> 1 3 0 a c #> 2 2 2 a d #> 3 2 1 a d #> 4 2 1 b a #> 5 1 4 a c # Or you can use it to dynamically select columns to `count()` by df %>% count(pick(starts_with(\"z\"))) #> # A tibble: 3 × 3 #> z1 z2 n #> #> 1 a c 2 #> 2 a d 2 #> 3 b a 1"},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":null,"dir":"Reference","previous_headings":"","what":"Progress bar with estimated time. — progress_estimated","title":"Progress bar with estimated time. — progress_estimated","text":"progress bar deprecated since providing progress bars responsibility dplyr. Instead, might try powerful progress package. reference class represents text progress bar displayed estimated time remaining. finished, displays total duration. automatic progress bar can disabled setting option dplyr.show_progress FALSE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Progress bar with estimated time. — progress_estimated","text":"","code":"progress_estimated(n, min_time = 0)"},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Progress bar with estimated time. — progress_estimated","text":"n Total number items min_time Progress bar wait least min_time seconds elapsed displaying results.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Progress bar with estimated time. — progress_estimated","text":"ref class methods tick(), print(), pause(), stop().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/progress_estimated.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Progress bar with estimated time. — progress_estimated","text":"","code":"p <- progress_estimated(3) #> Warning: `progress_estimated()` was deprecated in dplyr 1.0.0. p$tick() p$tick() p$tick() p <- progress_estimated(3) for (i in 1:3) p$pause(0.1)$tick()$print() p <- progress_estimated(3) p$tick()$print()$ pause(1)$stop() # If min_time is set, progress bar not shown until that many # seconds have elapsed p <- progress_estimated(3, min_time = 3) for (i in 1:3) p$pause(0.1)$tick()$print() if (FALSE) { p <- progress_estimated(10, min_time = 3) for (i in 1:10) p$pause(0.5)$tick()$print() }"},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract a single column — pull","title":"Extract a single column — pull","text":"pull() similar $. mostly useful looks little nicer pipes, also works remote data frames, can optionally name output.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract a single column — pull","text":"","code":"pull(.data, var = -1, name = NULL, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract a single column — pull","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. var variable specified : literal variable name positive integer, giving position counting left negative integer, giving position counting right. default returns last column (assumption column created recently). argument taken expression supports quasiquotation (can unquote column names column locations). name optional parameter specifies column used names named vector. Specified similar manner var. ... use methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract a single column — pull","text":"vector size .data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Extract a single column — pull","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_sql), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/pull.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract a single column — pull","text":"","code":"mtcars %>% pull(-1) #> [1] 4 4 1 1 2 1 4 2 2 4 4 3 3 3 4 4 4 1 2 1 1 2 2 4 2 1 2 2 4 6 8 2 mtcars %>% pull(1) #> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 #> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 #> [29] 15.8 19.7 15.0 21.4 mtcars %>% pull(cyl) #> [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4 # Also works for remote sources df <- dbplyr::memdb_frame(x = 1:10, y = 10:1, .name = \"pull-ex\") df %>% mutate(z = x * y) %>% pull() #> [1] 10 18 24 28 30 30 28 24 18 10 # Pull a named vector starwars %>% pull(height, name) #> Luke Skywalker C-3PO R2-D2 #> 172 167 96 #> Darth Vader Leia Organa Owen Lars #> 202 150 178 #> Beru Whitesun Lars R5-D4 Biggs Darklighter #> 165 97 183 #> Obi-Wan Kenobi Anakin Skywalker Wilhuff Tarkin #> 182 188 180 #> Chewbacca Han Solo Greedo #> 228 180 173 #> Jabba Desilijic Tiure Wedge Antilles Jek Tono Porkins #> 175 170 180 #> Yoda Palpatine Boba Fett #> 66 170 183 #> IG-88 Bossk Lando Calrissian #> 200 190 177 #> Lobot Ackbar Mon Mothma #> 175 180 150 #> Arvel Crynyd Wicket Systri Warrick Nien Nunb #> NA 88 160 #> Qui-Gon Jinn Nute Gunray Finis Valorum #> 193 191 170 #> Padmé Amidala Jar Jar Binks Roos Tarpals #> 185 196 224 #> Rugor Nass Ric Olié Watto #> 206 183 137 #> Sebulba Quarsh Panaka Shmi Skywalker #> 112 183 163 #> Darth Maul Bib Fortuna Ayla Secura #> 175 180 178 #> Ratts Tyerel Dud Bolt Gasgano #> 79 94 122 #> Ben Quadinaros Mace Windu Ki-Adi-Mundi #> 163 188 198 #> Kit Fisto Eeth Koth Adi Gallia #> 196 171 184 #> Saesee Tiin Yarael Poof Plo Koon #> 188 264 188 #> Mas Amedda Gregar Typho Cordé #> 196 185 157 #> Cliegg Lars Poggle the Lesser Luminara Unduli #> 183 183 170 #> Barriss Offee Dormé Dooku #> 166 165 193 #> Bail Prestor Organa Jango Fett Zam Wesell #> 191 183 168 #> Dexter Jettster Lama Su Taun We #> 198 229 213 #> Jocasta Nu R4-P17 Wat Tambor #> 167 96 193 #> San Hill Shaak Ti Grievous #> 191 178 216 #> Tarfful Raymus Antilles Sly Moore #> 234 188 178 #> Tion Medon Finn Rey #> 206 NA NA #> Poe Dameron BB8 Captain Phasma #> NA NA NA"},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":null,"dir":"Reference","previous_headings":"","what":"Recode values — recode","title":"Recode values — recode","text":"recode() superseded favor case_match(), handles important cases recode() elegant interface. recode_factor() also superseded, however, direct replacement currently available eventually live forcats. creating new variables based logical vectors, use if_else(). even complicated criteria, use case_when(). recode() vectorised version switch(): can replace numeric values based position name, character factor values name. S3 generic: dplyr provides methods numeric, character, factors. can use recode() directly factors; preserve existing order levels changing values. Alternatively, can use recode_factor(), change order levels match order replacements.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Recode values — recode","text":"","code":"recode(.x, ..., .default = NULL, .missing = NULL) recode_factor(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Recode values — recode","text":".x vector modify ... Replacements. character factor .x, named replacement based name. numeric .x, can named . named, replacement done based position .e. .x represents positions look replacements. See examples. named, argument names current values replaced, argument values new (replacement) values. replacements must type, must either length one length .x. .default supplied, values otherwise matched given value. supplied replacements type original values .x, unmatched values changed. supplied replacements compatible, unmatched values replaced NA. .default must either length 1 length .x. .missing supplied, missing values .x replaced value. Must either length 1 length .x. .ordered TRUE, recode_factor() creates ordered factor.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Recode values — recode","text":"vector length .x, type first ..., .default, .missing. recode_factor() returns factor whose levels order .... levels .default .missing come last.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/recode.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Recode values — recode","text":"","code":"char_vec <- sample(c(\"a\", \"b\", \"c\"), 10, replace = TRUE) # `recode()` is superseded by `case_match()` recode(char_vec, a = \"Apple\", b = \"Banana\") #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" \"c\" case_match(char_vec, \"a\" ~ \"Apple\", \"b\" ~ \"Banana\", .default = char_vec) #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" \"c\" # With `case_match()`, you don't need typed missings like `NA_character_` recode(char_vec, a = \"Apple\", b = \"Banana\", .default = NA_character_) #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" NA case_match(char_vec, \"a\" ~ \"Apple\", \"b\" ~ \"Banana\", .default = NA) #> [1] \"Apple\" \"Apple\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" \"Banana\" #> [8] \"Apple\" \"Apple\" NA # Throws an error as `NA` is logical, not character. try(recode(char_vec, a = \"Apple\", b = \"Banana\", .default = NA)) #> Error in recode(char_vec, a = \"Apple\", b = \"Banana\", .default = NA) : #> `.default` must be a character vector, not `NA`. # `case_match()` is easier to use with numeric vectors, because you don't # need to turn the numeric values into names num_vec <- c(1:4, NA) recode(num_vec, `2` = 20L, `4` = 40L) #> [1] 1 20 3 40 NA case_match(num_vec, 2 ~ 20, 4 ~ 40, .default = num_vec) #> [1] 1 20 3 40 NA # `case_match()` doesn't have the ability to match by position like # `recode()` does with numeric vectors recode(num_vec, \"a\", \"b\", \"c\", \"d\") #> [1] \"a\" \"b\" \"c\" \"d\" NA recode(c(1,5,3), \"a\", \"b\", \"c\", \"d\", .default = \"nothing\") #> [1] \"a\" \"nothing\" \"c\" # For `case_match()`, incompatible types are an error rather than a warning recode(num_vec, `2` = \"b\", `4` = \"d\") #> Warning: Unreplaced values treated as NA as `.x` is not compatible. #> Please specify replacements exhaustively or supply `.default`. #> [1] NA \"b\" NA \"d\" NA try(case_match(num_vec, 2 ~ \"b\", 4 ~ \"d\", .default = num_vec)) #> Error in case_match(num_vec, 2 ~ \"b\", 4 ~ \"d\", .default = num_vec) : #> Can't combine `..1 (right)` and `.default` . # The factor method of `recode()` can generally be replaced with # `forcats::fct_recode()` factor_vec <- factor(c(\"a\", \"b\", \"c\")) recode(factor_vec, a = \"Apple\") #> [1] Apple b c #> Levels: Apple b c # `recode_factor()` does not currently have a direct replacement, but we # plan to add one to forcats. In the meantime, you can use the `.ptype` # argument to `case_match()`. recode_factor( num_vec, `1` = \"z\", `2` = \"y\", `3` = \"x\", .default = \"D\", .missing = \"M\" ) #> [1] z y x D M #> Levels: z y x D M case_match( num_vec, 1 ~ \"z\", 2 ~ \"y\", 3 ~ \"x\", NA ~ \"M\", .default = \"D\", .ptype = factor(levels = c(\"z\", \"y\", \"x\", \"D\", \"M\")) ) #> [1] z y x D M #> Levels: z y x D M"},{"path":"https://dplyr.tidyverse.org/dev/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. magrittr %>% pillar type_sum tibble add_row, as_data_frame, as_tibble, data_frame, lst, tibble, tribble, view tidyselect all_of, any_of, contains, ends_with, everything, last_col, matches, num_range, one_of, starts_with, ","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":null,"dir":"Reference","previous_headings":"","what":"Transform each group to an arbitrary number of rows — reframe","title":"Transform each group to an arbitrary number of rows — reframe","text":"summarise() requires argument returns single value, mutate() requires argument returns number rows input, reframe() general workhorse requirements number rows returned per group. reframe() creates new data frame applying functions columns existing data frame. similar summarise(), two big differences: reframe() can return arbitrary number rows per group, summarise() reduces group single row. reframe() always returns ungrouped data frame, summarise() might return grouped rowwise data frame, depending scenario. expect use summarise() much often reframe(), reframe() can particularly helpful need apply complex function return single summary value.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Transform each group to an arbitrary number of rows — reframe","text":"","code":"reframe(.data, ..., .by = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Transform each group to an arbitrary number of rows — reframe","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs functions. name name variable result. value can vector length. Unnamed data frame values add multiple columns single expression. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Transform each group to an arbitrary number of rows — reframe","text":".data tibble, tibble. Otherwise, data.frame. rows originate underlying grouping keys. columns combination grouping keys expressions provide. output always ungrouped. Data frame attributes preserved, reframe() fundamentally creates new data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"connection-to-tibble","dir":"Reference","previous_headings":"","what":"Connection to tibble","title":"Transform each group to an arbitrary number of rows — reframe","text":"reframe() theoretically connected two functions tibble, tibble::enframe() tibble::deframe(): enframe(): vector -> data frame deframe(): data frame -> vector reframe(): data frame -> data frame","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Transform each group to an arbitrary number of rows — reframe","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/reframe.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Transform each group to an arbitrary number of rows — reframe","text":"","code":"table <- c(\"a\", \"b\", \"d\", \"f\") df <- tibble( g = c(1, 1, 1, 2, 2, 2, 2), x = c(\"e\", \"a\", \"b\", \"c\", \"f\", \"d\", \"a\") ) # `reframe()` allows you to apply functions that return # an arbitrary number of rows df %>% reframe(x = intersect(x, table)) #> # A tibble: 4 × 1 #> x #> #> 1 a #> 2 b #> 3 f #> 4 d # Functions are applied per group, and each group can return a # different number of rows. df %>% reframe(x = intersect(x, table), .by = g) #> # A tibble: 5 × 2 #> g x #> #> 1 1 a #> 2 1 b #> 3 2 f #> 4 2 d #> 5 2 a # The output is always ungrouped, even when using `group_by()` df %>% group_by(g) %>% reframe(x = intersect(x, table)) #> # A tibble: 5 × 2 #> g x #> #> 1 1 a #> 2 1 b #> 3 2 f #> 4 2 d #> 5 2 a # You can add multiple columns at once using a single expression by returning # a data frame. quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs, na.rm = TRUE), quant = probs ) } x <- c(10, 15, 18, 12) quantile_df(x) #> # A tibble: 3 × 2 #> val quant #> #> 1 11.5 0.25 #> 2 13.5 0.5 #> 3 15.8 0.75 starwars %>% reframe(quantile_df(height)) #> # A tibble: 3 × 2 #> val quant #> #> 1 167 0.25 #> 2 180 0.5 #> 3 191 0.75 starwars %>% reframe(quantile_df(height), .by = homeworld) #> # A tibble: 147 × 3 #> homeworld val quant #> #> 1 Tatooine 166. 0.25 #> 2 Tatooine 175 0.5 #> 3 Tatooine 183 0.75 #> 4 Naboo 168. 0.25 #> 5 Naboo 183 0.5 #> 6 Naboo 190. 0.75 #> 7 Alderaan 169 0.25 #> 8 Alderaan 188 0.5 #> 9 Alderaan 190. 0.75 #> 10 Stewjon 182 0.25 #> # ℹ 137 more rows starwars %>% reframe( across(c(height, mass), quantile_df, .unpack = TRUE), .by = homeworld ) #> # A tibble: 147 × 5 #> homeworld height_val height_quant mass_val mass_quant #> #> 1 Tatooine 166. 0.25 75 0.25 #> 2 Tatooine 175 0.5 80.5 0.5 #> 3 Tatooine 183 0.75 93 0.75 #> 4 Naboo 168. 0.25 50.2 0.25 #> 5 Naboo 183 0.5 70.5 0.5 #> 6 Naboo 190. 0.75 80.2 0.75 #> 7 Alderaan 169 0.25 56.5 0.25 #> 8 Alderaan 188 0.5 64 0.5 #> 9 Alderaan 190. 0.75 71.5 0.75 #> 10 Stewjon 182 0.25 77 0.25 #> # ℹ 137 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":null,"dir":"Reference","previous_headings":"","what":"Change column order — relocate","title":"Change column order — relocate","text":"Use relocate() change column positions, using syntax select() make easy move blocks columns .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Change column order — relocate","text":"","code":"relocate(.data, ..., .before = NULL, .after = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Change column order — relocate","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Columns move. ., . Destination columns selected .... Supplying neither move columns left-hand side; specifying error.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Change column order — relocate","text":"object type .data. output following properties: Rows affected. columns appear output, (usually) different place possibly renamed. Data frame attributes preserved. Groups affected.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Change column order — relocate","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/relocate.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Change column order — relocate","text":"","code":"df <- tibble(a = 1, b = 1, c = 1, d = \"a\", e = \"a\", f = \"a\") df %>% relocate(f) #> # A tibble: 1 × 6 #> f a b c d e #> #> 1 a 1 1 1 a a df %>% relocate(a, .after = c) #> # A tibble: 1 × 6 #> b c a d e f #> #> 1 1 1 1 a a a df %>% relocate(f, .before = b) #> # A tibble: 1 × 6 #> a f b c d e #> #> 1 1 a 1 1 a a df %>% relocate(a, .after = last_col()) #> # A tibble: 1 × 6 #> b c d e f a #> #> 1 1 1 a a a 1 # relocated columns can change name df %>% relocate(ff = f) #> # A tibble: 1 × 6 #> ff a b c d e #> #> 1 a 1 1 1 a a # Can also select variables based on their type df %>% relocate(where(is.character)) #> # A tibble: 1 × 6 #> d e f a b c #> #> 1 a a a 1 1 1 df %>% relocate(where(is.numeric), .after = last_col()) #> # A tibble: 1 × 6 #> d e f a b c #> #> 1 a a a 1 1 1 # Or with any other select helper df %>% relocate(any_of(c(\"a\", \"e\", \"i\", \"o\", \"u\"))) #> # A tibble: 1 × 6 #> a e b c d f #> #> 1 1 a 1 1 a a # When .before or .after refers to multiple variables they will be # moved to be immediately before/after the selected variables. df2 <- tibble(a = 1, b = \"a\", c = 1, d = \"a\") df2 %>% relocate(where(is.numeric), .after = where(is.character)) #> # A tibble: 1 × 4 #> b d a c #> #> 1 a a 1 1 df2 %>% relocate(where(is.numeric), .before = where(is.character)) #> # A tibble: 1 × 4 #> a c b d #> #> 1 1 1 a a"},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":null,"dir":"Reference","previous_headings":"","what":"Rename columns — rename","title":"Rename columns — rename","text":"rename() changes names individual variables using new_name = old_name syntax; rename_with() renames columns using function.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Rename columns — rename","text":"","code":"rename(.data, ...) rename_with(.data, .fn, .cols = everything(), ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Rename columns — rename","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... rename(): Use new_name = old_name rename selected variables. rename_with(): additional arguments passed onto .fn. .fn function used transform selected .cols. return character vector length input. .cols Columns rename; defaults columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Rename columns — rename","text":"object type .data. output following properties: Rows affected. Column names changed; column order preserved. Data frame attributes preserved. Groups updated reflect new names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Rename columns — rename","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/rename.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Rename columns — rename","text":"","code":"iris <- as_tibble(iris) # so it prints a little nicer rename(iris, petal_length = Petal.Length) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width petal_length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # Rename using a named vector and `all_of()` lookup <- c(pl = \"Petal.Length\", sl = \"Sepal.Length\") rename(iris, all_of(lookup)) #> # A tibble: 150 × 5 #> sl Sepal.Width pl Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # If your named vector might contain names that don't exist in the data, # use `any_of()` instead lookup <- c(lookup, new = \"unknown\") try(rename(iris, all_of(lookup))) #> Error in rename(iris, all_of(lookup)) : #> ℹ In argument: `all_of(lookup)`. #> Caused by error in `all_of()`: #> ! Can't subset elements that don't exist. #> ✖ Element `unknown` doesn't exist. rename(iris, any_of(lookup)) #> # A tibble: 150 × 5 #> sl Sepal.Width pl Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows rename_with(iris, toupper) #> # A tibble: 150 × 5 #> SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows rename_with(iris, toupper, starts_with(\"Petal\")) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width PETAL.LENGTH PETAL.WIDTH Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows rename_with(iris, ~ tolower(gsub(\".\", \"_\", .x, fixed = TRUE))) #> # A tibble: 150 × 5 #> sepal_length sepal_width petal_length petal_width species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows # If your renaming function uses `paste0()`, make sure to set # `recycle0 = TRUE` to ensure that empty selections are recycled correctly try(rename_with( iris, ~ paste0(\"prefix_\", .x), starts_with(\"nonexistent\") )) #> Error in rename_with(iris, ~paste0(\"prefix_\", .x), starts_with(\"nonexistent\")) : #> `.fn` must return a vector of length 0, not 1. rename_with( iris, ~ paste0(\"prefix_\", .x, recycle0 = TRUE), starts_with(\"nonexistent\") ) #> # A tibble: 150 × 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ℹ 140 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":null,"dir":"Reference","previous_headings":"","what":"Integer ranking functions — row_number","title":"Integer ranking functions — row_number","text":"Three ranking functions inspired SQL2003. differ primarily handle ties: row_number() gives every input unique rank, c(10, 20, 20, 30) get ranks c(1, 2, 3, 4). equivalent rank(ties.method = \"first\"). min_rank() gives every tie (smallest) value c(10, 20, 20, 30) gets ranks c(1, 2, 2, 4). way ranks usually computed sports equivalent rank(ties.method = \"min\"). dense_rank() works like min_rank(), leave gaps, c(10, 20, 20, 30) gets ranks c(1, 2, 2, 3).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Integer ranking functions — row_number","text":"","code":"row_number(x) min_rank(x) dense_rank(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Integer ranking functions — row_number","text":"x vector rank default, smallest values get smallest ranks. Use desc() reverse direction largest values get smallest ranks. Missing values given rank NA. Use coalesce(x, Inf) coalesce(x, -Inf) want treat largest smallest values respectively. rank multiple columns , supply data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Integer ranking functions — row_number","text":"integer vector.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/row_number.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Integer ranking functions — row_number","text":"","code":"x <- c(5, 1, 3, 2, 2, NA) row_number(x) #> [1] 5 1 4 2 3 NA min_rank(x) #> [1] 5 1 4 2 2 NA dense_rank(x) #> [1] 4 1 3 2 2 NA # Ranking functions can be used in `filter()` to select top/bottom rows df <- data.frame( grp = c(1, 1, 1, 2, 2, 2, 3, 3, 3), x = c(3, 2, 1, 1, 2, 2, 1, 1, 1), y = c(1, 3, 2, 3, 2, 2, 4, 1, 2), id = 1:9 ) # Always gives exactly 1 row per group df %>% group_by(grp) %>% filter(row_number(x) == 1) #> # A tibble: 3 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 1 2 3 #> 2 2 1 3 4 #> 3 3 1 4 7 # May give more than 1 row if ties df %>% group_by(grp) %>% filter(min_rank(x) == 1) #> # A tibble: 5 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 1 2 3 #> 2 2 1 3 4 #> 3 3 1 4 7 #> 4 3 1 1 8 #> 5 3 1 2 9 # Rank by multiple columns (to break ties) by selecting them with `pick()` df %>% group_by(grp) %>% filter(min_rank(pick(x, y)) == 1) #> # A tibble: 3 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 1 2 3 #> 2 2 1 3 4 #> 3 3 1 1 8 # See slice_min() and slice_max() for another way to tackle the same problem # You can use row_number() without an argument to refer to the \"current\" # row number. df %>% group_by(grp) %>% filter(row_number() == 1) #> # A tibble: 3 × 4 #> # Groups: grp [3] #> grp x y id #> #> 1 1 3 1 1 #> 2 2 1 3 4 #> 3 3 1 4 7 # It's easiest to see what this does with mutate(): df %>% group_by(grp) %>% mutate(grp_id = row_number()) #> # A tibble: 9 × 5 #> # Groups: grp [3] #> grp x y id grp_id #> #> 1 1 3 1 1 1 #> 2 1 2 3 2 2 #> 3 1 1 2 3 3 #> 4 2 1 3 4 1 #> 5 2 2 2 5 2 #> 6 2 2 2 6 3 #> 7 3 1 4 7 1 #> 8 3 1 1 8 2 #> 9 3 1 2 9 3"},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":null,"dir":"Reference","previous_headings":"","what":"Manipulate individual rows — rows","title":"Manipulate individual rows — rows","text":"functions provide framework modifying rows table using second table data. two tables matched set key variables whose values typically uniquely identify row. functions inspired SQL's INSERT, UPDATE, DELETE, can optionally modify in_place selected backends. rows_insert() adds new rows (like INSERT). default, key values y must exist x. rows_append() works like rows_insert() ignores keys. rows_update() modifies existing rows (like UPDATE). Key values y must unique, , default, key values y must exist x. rows_patch() works like rows_update() overwrites NA values. rows_upsert() inserts updates depending whether key value y already exists x. Key values y must unique. rows_delete() deletes rows (like DELETE). default, key values y must exist x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Manipulate individual rows — rows","text":"","code":"rows_insert( x, y, by = NULL, ..., conflict = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE ) rows_append(x, y, ..., copy = FALSE, in_place = FALSE) rows_update( x, y, by = NULL, ..., unmatched = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE ) rows_patch( x, y, by = NULL, ..., unmatched = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE ) rows_upsert(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_delete( x, y, by = NULL, ..., unmatched = c(\"error\", \"ignore\"), copy = FALSE, in_place = FALSE )"},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Manipulate individual rows — rows","text":"x, y pair data frames data frame extensions (e.g. tibble). y must columns x subset. unnamed character vector giving key columns. key columns must exist x y. Keys typically uniquely identify row, enforced key values y rows_update(), rows_patch(), rows_upsert() used. default, use first column y, since first column reasonable place put identifier variable. ... parameters passed onto methods. conflict rows_insert(), keys y conflict keys x handled? conflict arises key y already exists x. One : \"error\", default, error keys y conflict keys x. \"ignore\" ignore rows y keys conflict keys x. copy x y data source, copy TRUE, y copied src x. allows join tables across srcs, potentially expensive operation must opt . in_place x modified place? argument relevant mutable backends (e.g. databases, data.tables). TRUE, modified version x returned invisibly; FALSE, new object representing resulting changes returned. unmatched rows_update(), rows_patch(), rows_delete(), keys y unmatched keys x handled? One : \"error\", default, error keys y unmatched keys x. \"ignore\" ignore rows y keys unmatched keys x.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Manipulate individual rows — rows","text":"object type x. order rows columns x preserved much possible. output following properties: rows_update() rows_patch() preserve number rows; rows_insert(), rows_append(), rows_upsert() return existing rows potentially new rows; rows_delete() returns subset rows. Columns added, removed, relocated, though data may updated. Groups taken x. Data frame attributes taken x. in_place = TRUE, result returned invisibly.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Manipulate individual rows — rows","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: rows_insert(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_append(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_update(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_patch(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_upsert(): dbplyr (tbl_lazy), dplyr (data.frame) . rows_delete(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rows.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Manipulate individual rows — rows","text":"","code":"data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) data #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 # Insert rows_insert(data, tibble(a = 4, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 4 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 #> 4 4 z NA # By default, if a key in `y` matches a key in `x`, then it can't be inserted # and will throw an error. Alternatively, you can ignore rows in `y` # containing keys that conflict with keys in `x` with `conflict = \"ignore\"`, # or you can use `rows_append()` to ignore keys entirely. try(rows_insert(data, tibble(a = 3, b = \"z\"))) #> Matching, by = \"a\" #> Error in rows_insert(data, tibble(a = 3, b = \"z\")) : #> `y` can't contain keys that already exist in `x`. #> ℹ The following rows in `y` have keys that already exist in `x`: `c(1)`. #> ℹ Use `conflict = \"ignore\"` if you want to ignore these `y` rows. rows_insert(data, tibble(a = 3, b = \"z\"), conflict = \"ignore\") #> Matching, by = \"a\" #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 rows_append(data, tibble(a = 3, b = \"z\")) #> # A tibble: 4 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 NA 2.5 #> 4 3 z NA # Update rows_update(data, tibble(a = 2:3, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 z 1.5 #> 3 3 z 2.5 rows_update(data, tibble(b = \"z\", a = 2:3), by = \"a\") #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 z 1.5 #> 3 3 z 2.5 # Variants: patch and upsert rows_patch(data, tibble(a = 2:3, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 z 2.5 rows_upsert(data, tibble(a = 2:4, b = \"z\")) #> Matching, by = \"a\" #> # A tibble: 4 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 z 1.5 #> 3 3 z 2.5 #> 4 4 z NA # Delete and truncate rows_delete(data, tibble(a = 2:3)) #> Matching, by = \"a\" #> # A tibble: 1 × 3 #> a b c #> #> 1 1 a 0.5 rows_delete(data, tibble(a = 2:3, b = \"b\")) #> Matching, by = \"a\" #> Ignoring extra `y` columns: b #> # A tibble: 1 × 3 #> a b c #> #> 1 1 a 0.5 # By default, for update, patch, and delete it is an error if a key in `y` # doesn't exist in `x`. You can ignore rows in `y` that have unmatched keys # with `unmatched = \"ignore\"`. y <- tibble(a = 3:4, b = \"z\") try(rows_update(data, y, by = \"a\")) #> Error in rows_update(data, y, by = \"a\") : #> `y` must contain keys that already exist in `x`. #> ℹ The following rows in `y` have keys that don't exist in `x`: `c(2)`. #> ℹ Use `unmatched = \"ignore\"` if you want to ignore these `y` rows. rows_update(data, y, by = \"a\", unmatched = \"ignore\") #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 z 2.5 rows_patch(data, y, by = \"a\", unmatched = \"ignore\") #> # A tibble: 3 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5 #> 3 3 z 2.5 rows_delete(data, y, by = \"a\", unmatched = \"ignore\") #> Ignoring extra `y` columns: b #> # A tibble: 2 × 3 #> a b c #> #> 1 1 a 0.5 #> 2 2 b 1.5"},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":null,"dir":"Reference","previous_headings":"","what":"Group input by rows — rowwise","title":"Group input by rows — rowwise","text":"rowwise() allows compute data frame row---time. useful vectorised function exist. dplyr verbs preserve row-wise grouping. exception summarise(), return grouped_df. can explicitly ungroup ungroup() as_tibble(), convert grouped_df group_by().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Group input by rows — rowwise","text":"","code":"rowwise(data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Group input by rows — rowwise","text":"data Input data frame. ... Variables preserved calling summarise(). typically set variables whose combination uniquely identify row. NB: unlike group_by() can create new variables instead can select multiple variables (e.g.) everything().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Group input by rows — rowwise","text":"row-wise data frame class rowwise_df. Note rowwise_df implicitly grouped row, grouped_df.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"list-columns","dir":"Reference","previous_headings":"","what":"List-columns","title":"Group input by rows — rowwise","text":"rowwise exactly one row per group offers small convenience working list-columns. Normally, summarise() mutate() extract groups worth data [. index list way, get back another list. working rowwise tibble, dplyr use [[ instead [ make life little easier.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/rowwise.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Group input by rows — rowwise","text":"","code":"df <- tibble(x = runif(6), y = runif(6), z = runif(6)) # Compute the mean of x, y, z in each row df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) #> # A tibble: 6 × 4 #> # Rowwise: #> x y z m #> #> 1 0.386 0.929 0.723 0.679 #> 2 0.237 0.645 0.696 0.526 #> 3 0.520 0.783 0.871 0.724 #> 4 0.641 0.0587 0.709 0.470 #> 5 0.830 0.358 0.355 0.514 #> 6 0.919 0.479 0.807 0.735 # use c_across() to more easily select many variables df %>% rowwise() %>% mutate(m = mean(c_across(x:z))) #> # A tibble: 6 × 4 #> # Rowwise: #> x y z m #> #> 1 0.386 0.929 0.723 0.679 #> 2 0.237 0.645 0.696 0.526 #> 3 0.520 0.783 0.871 0.724 #> 4 0.641 0.0587 0.709 0.470 #> 5 0.830 0.358 0.355 0.514 #> 6 0.919 0.479 0.807 0.735 # Compute the minimum of x and y in each row df %>% rowwise() %>% mutate(m = min(c(x, y, z))) #> # A tibble: 6 × 4 #> # Rowwise: #> x y z m #> #> 1 0.386 0.929 0.723 0.386 #> 2 0.237 0.645 0.696 0.237 #> 3 0.520 0.783 0.871 0.520 #> 4 0.641 0.0587 0.709 0.0587 #> 5 0.830 0.358 0.355 0.355 #> 6 0.919 0.479 0.807 0.479 # In this case you can use an existing vectorised function: df %>% mutate(m = pmin(x, y, z)) #> # A tibble: 6 × 4 #> x y z m #> #> 1 0.386 0.929 0.723 0.386 #> 2 0.237 0.645 0.696 0.237 #> 3 0.520 0.783 0.871 0.520 #> 4 0.641 0.0587 0.709 0.0587 #> 5 0.830 0.358 0.355 0.355 #> 6 0.919 0.479 0.807 0.479 # Where these functions exist they'll be much faster than rowwise # so be on the lookout for them. # rowwise() is also useful when doing simulations params <- tribble( ~sim, ~n, ~mean, ~sd, 1, 1, 1, 1, 2, 2, 2, 4, 3, 3, -1, 2 ) # Here I supply variables to preserve after the computation params %>% rowwise(sim) %>% reframe(z = rnorm(n, mean, sd)) #> # A tibble: 6 × 2 #> sim z #> #> 1 1 0.622 #> 2 2 7.32 #> 3 2 -1.09 #> 4 3 0.0376 #> 5 3 -0.787 #> 6 3 0.163 # If you want one row per simulation, put the results in a list() params %>% rowwise(sim) %>% summarise(z = list(rnorm(n, mean, sd)), .groups = \"keep\") #> # A tibble: 3 × 2 #> # Groups: sim [3] #> sim z #> #> 1 1 #> 2 2 #> 3 3 "},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":null,"dir":"Reference","previous_headings":"","what":"Figure out if two sources are the same (or two tbl have the same source) — same_src","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"Figure two sources (two tbl source)","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"","code":"same_src(x, y)"},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"x, y src tbls test","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/same_src.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Figure out if two sources are the same (or two tbl have the same source) — same_src","text":"logical flag","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample n rows from a table — sample_n","title":"Sample n rows from a table — sample_n","text":"sample_n() sample_frac() superseded favour slice_sample(). deprecated near future, retirement means perform critical bug fixes, recommend moving newer alternative. functions superseded realised convenient two mutually exclusive arguments one function, rather two separate functions. also made clean smaller design issues sample_n()/sample_frac: connection slice() obvious. name first argument, tbl, inconsistent single table verbs use .data. size argument uses tidy evaluation, surprising undocumented. easier remove deprecated .env argument. ... suboptimal position.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample n rows from a table — sample_n","text":"","code":"sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) sample_frac(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample n rows from a table — sample_n","text":"tbl data.frame. size sample_n(), number rows select. sample_frac(), fraction rows select. tbl grouped, size applies group. replace Sample without replacement? weight Sampling weights. must evaluate vector non-negative numbers length input. Weights automatically standardised sum 1. .env DEPRECATED. ... ignored","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sample_n.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sample n rows from a table — sample_n","text":"","code":"df <- tibble(x = 1:5, w = c(0.1, 0.1, 0.1, 2, 2)) # sample_n() -> slice_sample() ---------------------------------------------- # Was: sample_n(df, 3) #> # A tibble: 3 × 2 #> x w #> #> 1 3 0.1 #> 2 4 2 #> 3 2 0.1 sample_n(df, 10, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 3 0.1 #> 2 2 0.1 #> 3 2 0.1 #> 4 5 2 #> 5 1 0.1 #> 6 2 0.1 #> 7 1 0.1 #> 8 2 0.1 #> 9 4 2 #> 10 4 2 sample_n(df, 3, weight = w) #> # A tibble: 3 × 2 #> x w #> #> 1 5 2 #> 2 4 2 #> 3 1 0.1 # Now: slice_sample(df, n = 3) #> # A tibble: 3 × 2 #> x w #> #> 1 5 2 #> 2 3 0.1 #> 3 1 0.1 slice_sample(df, n = 10, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 3 0.1 #> 2 4 2 #> 3 2 0.1 #> 4 1 0.1 #> 5 1 0.1 #> 6 3 0.1 #> 7 4 2 #> 8 1 0.1 #> 9 2 0.1 #> 10 5 2 slice_sample(df, n = 3, weight_by = w) #> # A tibble: 3 × 2 #> x w #> #> 1 4 2 #> 2 5 2 #> 3 2 0.1 # Note that sample_n() would error if n was bigger than the group size # slice_sample() will just use the available rows for consistency with # the other slice helpers like slice_head() try(sample_n(df, 10)) #> Error in sample_n(df, 10) : Can't compute indices. #> Caused by error: #> ! `size` must be less than or equal to 5 (size of data). #> ℹ set `replace = TRUE` to use sampling with replacement. slice_sample(df, n = 10) #> # A tibble: 5 × 2 #> x w #> #> 1 2 0.1 #> 2 3 0.1 #> 3 5 2 #> 4 1 0.1 #> 5 4 2 # sample_frac() -> slice_sample() ------------------------------------------- # Was: sample_frac(df, 0.25) #> # A tibble: 1 × 2 #> x w #> #> 1 1 0.1 sample_frac(df, 2, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 3 0.1 #> 2 1 0.1 #> 3 5 2 #> 4 4 2 #> 5 1 0.1 #> 6 5 2 #> 7 3 0.1 #> 8 1 0.1 #> 9 2 0.1 #> 10 3 0.1 # Now: slice_sample(df, prop = 0.25) #> # A tibble: 1 × 2 #> x w #> #> 1 3 0.1 slice_sample(df, prop = 2, replace = TRUE) #> # A tibble: 10 × 2 #> x w #> #> 1 2 0.1 #> 2 3 0.1 #> 3 5 2 #> 4 2 0.1 #> 5 2 0.1 #> 6 5 2 #> 7 1 0.1 #> 8 2 0.1 #> 9 1 0.1 #> 10 4 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/scoped.html","id":null,"dir":"Reference","previous_headings":"","what":"Operate on a selection of variables — scoped","title":"Operate on a selection of variables — scoped","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. variants suffixed _if, _at _all apply expression (sometimes several) variables within specified subset. subset can contain variables (_all variants), vars() selection (_at variants), variables selected predicate (_if variants). verbs scoped variants : mutate(), transmute() summarise(). See summarise_all(). filter(). See filter_all(). group_by(). See group_by_all(). rename() select(). See select_all(). arrange(). See arrange_all() three kinds scoped variants. differ scope variable selection operations applied: Verbs suffixed _all() apply operation variables. Verbs suffixed _at() apply operation subset variables specified quoting function vars(). quoting function accepts tidyselect::vars_select() helpers like starts_with(). Instead vars() selection, can also supply integerish vector column positions character vector column names. Verbs suffixed _if() apply operation subset variables predicate function returns TRUE. Instead predicate function, can also supply logical vector.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/scoped.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Operate on a selection of variables — scoped","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. ... Additional arguments function calls .funs. evaluated , tidy dots support.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/scoped.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Operate on a selection of variables — scoped","text":"operations also apply grouping variables part selection. includes: arrange_all(), arrange_at(), arrange_if() distinct_all(), distinct_at(), distinct_if() filter_all(), filter_at(), filter_if() group_by_all(), group_by_at(), group_by_if() select_all(), select_at(), select_if() case summarising mutating variants operations applied grouping variables. behaviour depends whether selection implicit (selections) explicit (selections). Grouping variables covered explicit selections (summarise_at(), mutate_at(), transmute_at()) always error. implicit selections, grouping variables always ignored. case, level verbosity depends kind operation: Summarising operations (summarise_all() summarise_if()) ignore grouping variables silently obvious operations applied grouping variables. hand obvious case mutating operations (mutate_all(), mutate_if(), transmute_all(), transmute_if()). reason, issue message indicating grouping variables ignored.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/se-deprecated.html","id":null,"dir":"Reference","previous_headings":"","what":"Deprecated SE versions of main verbs. — se-deprecated","title":"Deprecated SE versions of main verbs. — se-deprecated","text":"dplyr used offer twin versions verb suffixed underscore. versions standard evaluation (SE) semantics: rather taking arguments code, like NSE verbs, took arguments value. purpose make possible program dplyr. However, dplyr now uses tidy evaluation semantics. NSE verbs still capture arguments, can now unquote parts arguments. offers full programmability NSE verbs. Thus, underscored versions now superfluous. Unquoting triggers immediate evaluation operand inlines result within captured expression. result can value expression evaluated later rest argument. See vignette(\"programming\") information.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/se-deprecated.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Deprecated SE versions of main verbs. — se-deprecated","text":"","code":"add_count_(x, vars, wt = NULL, sort = FALSE) add_tally_(x, wt, sort = FALSE) arrange_(.data, ..., .dots = list()) count_(x, vars, wt = NULL, sort = FALSE, .drop = group_by_drop_default(x)) distinct_(.data, ..., .dots, .keep_all = FALSE) do_(.data, ..., .dots = list()) filter_(.data, ..., .dots = list()) funs_(dots, args = list(), env = base_env()) group_by_(.data, ..., .dots = list(), add = FALSE) group_indices_(.data, ..., .dots = list()) mutate_(.data, ..., .dots = list()) tally_(x, wt, sort = FALSE) transmute_(.data, ..., .dots = list()) rename_(.data, ..., .dots = list()) rename_vars_(vars, args) select_(.data, ..., .dots = list()) select_vars_(vars, args, include = chr(), exclude = chr()) slice_(.data, ..., .dots = list()) summarise_(.data, ..., .dots = list()) summarize_(.data, ..., .dots = list())"},{"path":"https://dplyr.tidyverse.org/dev/reference/se-deprecated.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Deprecated SE versions of main verbs. — se-deprecated","text":"x tbl() vars Various meanings depending verb. wt Frequency weights. Can NULL variable: NULL (default), counts number rows group. variable, computes sum(wt) group. sort TRUE, show largest groups top. .data data frame. .drop Drop groups formed factor levels appear data? default TRUE except .data previously grouped .drop = FALSE. See group_by_drop_default() details. .keep_all TRUE, keep variables .data. combination ... distinct, keeps first row values. dots, .dots, ... Pair/values expressions coercible lazy objects. args Various meanings depending verb. env environment functions evaluated. add FALSE, default, group_by() override existing groups. add existing groups, use .add = TRUE. argument previously called add, prevented creating new grouping variable called add, conflicts naming conventions. include, exclude Character vector column names always include/exclude.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":null,"dir":"Reference","previous_headings":"","what":"Keep or drop columns using their names and types — select","title":"Keep or drop columns using their names and types — select","text":"Select (optionally rename) variables data frame, using concise mini-language makes easy refer variables based name (e.g. :f selects columns left f right) type (e.g. (.numeric) selects numeric columns).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"overview-of-selection-features","dir":"Reference","previous_headings":"","what":"Overview of selection features","title":"Keep or drop columns using their names and types — select","text":"Tidyverse selections implement dialect R operators make easy select variables: : selecting range consecutive variables. ! taking complement set variables. & | selecting intersection union two sets variables. c() combining selections. addition, can use selection helpers. helpers select specific columns: everything(): Matches variables. last_col(): Select last variable, possibly offset. group_cols(): Select grouping columns. helpers select variables matching patterns names: starts_with(): Starts prefix. ends_with(): Ends suffix. contains(): Contains literal string. matches(): Matches regular expression. num_range(): Matches numerical range like x01, x02, x03. variables stored character vector: all_of(): Matches variable names character vector. names must present, otherwise --bounds error thrown. any_of(): all_of(), except error thrown names exist. using predicate function: (): Applies function variables selects function returns TRUE.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Keep or drop columns using their names and types — select","text":"","code":"select(.data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Keep or drop columns using their names and types — select","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... One unquoted expressions separated commas. Variable names can used positions data frame, expressions like x:y can used select range variables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Keep or drop columns using their names and types — select","text":"object type .data. output following properties: Rows affected. Output columns subset input columns, potentially different order. Columns renamed new_name = old_name form used. Data frame attributes preserved. Groups maintained; select grouping variables.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Keep or drop columns using their names and types — select","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Keep or drop columns using their names and types — select","text":"show usage basic selection operators. See specific help pages learn helpers like starts_with(). selection language can used functions like dplyr::select() tidyr::pivot_longer(). first attach tidyverse: Select variables name: Select multiple variables separating commas. Note order columns determined order inputs: Functions like tidyr::pivot_longer() take variables dots. case use c() select multiple variables:","code":"library(tidyverse) # For better printing iris <- as_tibble(iris) starwars %>% select(height) #> # A tibble: 87 x 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # i 83 more rows iris %>% pivot_longer(Sepal.Length) #> # A tibble: 150 x 6 #> Sepal.Width Petal.Length Petal.Width Species name value #> #> 1 3.5 1.4 0.2 setosa Sepal.Length 5.1 #> 2 3 1.4 0.2 setosa Sepal.Length 4.9 #> 3 3.2 1.3 0.2 setosa Sepal.Length 4.7 #> 4 3.1 1.5 0.2 setosa Sepal.Length 4.6 #> # i 146 more rows starwars %>% select(homeworld, height, mass) #> # A tibble: 87 x 3 #> homeworld height mass #> #> 1 Tatooine 172 77 #> 2 Tatooine 167 75 #> 3 Naboo 96 32 #> 4 Tatooine 202 136 #> # i 83 more rows iris %>% pivot_longer(c(Sepal.Length, Petal.Length)) #> # A tibble: 300 x 5 #> Sepal.Width Petal.Width Species name value #> #> 1 3.5 0.2 setosa Sepal.Length 5.1 #> 2 3.5 0.2 setosa Petal.Length 1.4 #> 3 3 0.2 setosa Sepal.Length 4.9 #> 4 3 0.2 setosa Petal.Length 1.4 #> # i 296 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/select.html","id":"operators-","dir":"Reference","previous_headings":"","what":"Operators:","title":"Keep or drop columns using their names and types — select","text":": operator selects range consecutive variables: ! operator negates selection: & | take intersection union two selections: take difference two selections, combine & ! operators:","code":"starwars %>% select(name:mass) #> # A tibble: 87 x 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # i 83 more rows starwars %>% select(!(name:mass)) #> # A tibble: 87 x 11 #> hair_color skin_color eye_color birth_year sex gender homeworld species #> #> 1 blond fair blue 19 male masculine Tatooine Human #> 2 gold yellow 112 none masculine Tatooine Droid #> 3 white, blue red 33 none masculine Naboo Droid #> 4 none white yellow 41.9 male masculine Tatooine Human #> # i 83 more rows #> # i 3 more variables: films , vehicles , starships iris %>% select(!c(Sepal.Length, Petal.Length)) #> # A tibble: 150 x 3 #> Sepal.Width Petal.Width Species #> #> 1 3.5 0.2 setosa #> 2 3 0.2 setosa #> 3 3.2 0.2 setosa #> 4 3.1 0.2 setosa #> # i 146 more rows iris %>% select(!ends_with(\"Width\")) #> # A tibble: 150 x 3 #> Sepal.Length Petal.Length Species #> #> 1 5.1 1.4 setosa #> 2 4.9 1.4 setosa #> 3 4.7 1.3 setosa #> 4 4.6 1.5 setosa #> # i 146 more rows iris %>% select(starts_with(\"Petal\") & ends_with(\"Width\")) #> # A tibble: 150 x 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> # i 146 more rows iris %>% select(starts_with(\"Petal\") | ends_with(\"Width\")) #> # A tibble: 150 x 3 #> Petal.Length Petal.Width Sepal.Width #> #> 1 1.4 0.2 3.5 #> 2 1.4 0.2 3 #> 3 1.3 0.2 3.2 #> 4 1.5 0.2 3.1 #> # i 146 more rows iris %>% select(starts_with(\"Petal\") & !ends_with(\"Width\")) #> # A tibble: 150 x 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> # i 146 more rows"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Select and rename a selection of variables — select_all","title":"Select and rename a selection of variables — select_all","text":"rename_if(), rename_at(), rename_all() superseded rename_with(). matching select statements superseded combination select() + rename_with(). predicate functions passed arguments select() rename_with() must wrapped (). functions superseded mutate_if() friends superseded across(). select_if() rename_if() already use tidy selection replaced across() instead need new function.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select and rename a selection of variables — select_all","text":"","code":"select_all(.tbl, .funs = list(), ...) rename_all(.tbl, .funs = list(), ...) select_if(.tbl, .predicate, .funs = list(), ...) rename_if(.tbl, .predicate, .funs = list(), ...) select_at(.tbl, .vars, .funs = list(), ...) rename_at(.tbl, .vars, .funs = list(), ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select and rename a selection of variables — select_all","text":".tbl tbl object. .funs function fun, purrr style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/select_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select and rename a selection of variables — select_all","text":"","code":"mtcars <- as_tibble(mtcars) # for nicer printing mtcars %>% rename_all(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # NB: the transformation comes first in rename_with is_whole <- function(x) all(floor(x) == x) mtcars %>% rename_if(is_whole, toupper) #> # A tibble: 32 × 11 #> mpg CYL disp HP drat wt qsec VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper, where(is_whole)) #> # A tibble: 32 × 11 #> mpg CYL disp HP drat wt qsec VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows mtcars %>% rename_at(vars(mpg:hp), toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper, mpg:hp) #> # A tibble: 32 × 11 #> MPG CYL DISP HP drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # You now must select() and then rename mtcars %>% select_all(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% rename_with(toupper) #> # A tibble: 32 × 11 #> MPG CYL DISP HP DRAT WT QSEC VS AM GEAR CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows # Selection drops unselected variables: mtcars %>% select_if(is_whole, toupper) #> # A tibble: 32 × 6 #> CYL HP VS AM GEAR CARB #> #> 1 6 110 0 1 4 4 #> 2 6 110 0 1 4 4 #> 3 4 93 1 1 4 1 #> 4 6 110 1 0 3 1 #> 5 8 175 0 0 3 2 #> 6 6 105 1 0 3 1 #> 7 8 245 0 0 3 4 #> 8 4 62 1 0 4 2 #> 9 4 95 1 0 4 2 #> 10 6 123 1 0 4 4 #> # ℹ 22 more rows # -> mtcars %>% select(where(is_whole)) %>% rename_with(toupper) #> # A tibble: 32 × 6 #> CYL HP VS AM GEAR CARB #> #> 1 6 110 0 1 4 4 #> 2 6 110 0 1 4 4 #> 3 4 93 1 1 4 1 #> 4 6 110 1 0 3 1 #> 5 8 175 0 0 3 2 #> 6 6 105 1 0 3 1 #> 7 8 245 0 0 3 4 #> 8 4 62 1 0 4 2 #> 9 4 95 1 0 4 2 #> 10 6 123 1 0 4 4 #> # ℹ 22 more rows mtcars %>% select_at(vars(-contains(\"ar\"), starts_with(\"c\")), toupper) #> # A tibble: 32 × 10 #> MPG CYL DISP HP DRAT WT QSEC VS AM CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 #> # ℹ 22 more rows # -> mtcars %>% select(!contains(\"ar\") | starts_with(\"c\")) %>% rename_with(toupper) #> # A tibble: 32 × 10 #> MPG CYL DISP HP DRAT WT QSEC VS AM CARB #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 #> # ℹ 22 more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":null,"dir":"Reference","previous_headings":"","what":"Set operations — setops","title":"Set operations — setops","text":"Perform set operations using rows data frame. intersect(x, y) finds rows x y. union(x, y) finds rows either x y, excluding duplicates. union_all(x, y) finds rows either x y, including duplicates. setdiff(x, y) finds rows x y. symdiff(x, y) computes symmetric difference, .e. rows x y rows y x. setequal(x, y) returns TRUE x y contain rows (ignoring order). Note intersect(), union(), setdiff(), symdiff() remove duplicates x y.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set operations — setops","text":"","code":"intersect(x, y, ...) union(x, y, ...) union_all(x, y, ...) setdiff(x, y, ...) setequal(x, y, ...) symdiff(x, y, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set operations — setops","text":"x, y Pair compatible data frames. pair data frames compatible column names (possibly different orders) compatible types. ... dots future extensions must empty.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"base-functions","dir":"Reference","previous_headings":"","what":"Base functions","title":"Set operations — setops","text":"intersect(), union(), setdiff(), setequal() override base functions name order make generic. existing behaviour vectors preserved providing default methods call base functions.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/setops.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Set operations — setops","text":"","code":"df1 <- tibble(x = 1:3) df2 <- tibble(x = 3:5) intersect(df1, df2) #> # A tibble: 1 × 1 #> x #> #> 1 3 union(df1, df2) #> # A tibble: 5 × 1 #> x #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 union_all(df1, df2) #> # A tibble: 6 × 1 #> x #> #> 1 1 #> 2 2 #> 3 3 #> 4 3 #> 5 4 #> 6 5 setdiff(df1, df2) #> # A tibble: 2 × 1 #> x #> #> 1 1 #> 2 2 setdiff(df2, df1) #> # A tibble: 2 × 1 #> x #> #> 1 4 #> 2 5 symdiff(df1, df2) #> # A tibble: 4 × 1 #> x #> #> 1 1 #> 2 2 #> 3 4 #> 4 5 setequal(df1, df2) #> [1] FALSE setequal(df1, df1[3:1, ]) #> [1] TRUE # Note that the following functions remove pre-existing duplicates: df1 <- tibble(x = c(1:3, 3, 3)) df2 <- tibble(x = c(3:5, 5)) intersect(df1, df2) #> # A tibble: 1 × 1 #> x #> #> 1 3 union(df1, df2) #> # A tibble: 5 × 1 #> x #> #> 1 1 #> 2 2 #> 3 3 #> 4 4 #> 5 5 setdiff(df1, df2) #> # A tibble: 2 × 1 #> x #> #> 1 1 #> 2 2 symdiff(df1, df2) #> # A tibble: 4 × 1 #> x #> #> 1 1 #> 2 2 #> 3 4 #> 4 5"},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":null,"dir":"Reference","previous_headings":"","what":"Subset rows using their positions — slice","title":"Subset rows using their positions — slice","text":"slice() lets index rows (integer) locations. allows select, remove, duplicate rows. accompanied number helpers common use cases: slice_head() slice_tail() select first last rows. slice_sample() randomly selects rows. slice_min() slice_max() select rows smallest largest values variable. .data grouped_df, operation performed group, (e.g.) slice_head(df, n = 5) select first five rows group.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Subset rows using their positions — slice","text":"","code":"slice(.data, ..., .by = NULL, .preserve = FALSE) slice_head(.data, ..., n, prop, by = NULL) slice_tail(.data, ..., n, prop, by = NULL) slice_min( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) slice_max( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) slice_sample(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Subset rows using their positions — slice","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... slice(): Integer row values. Provide either positive values keep, negative values drop. values provided must either positive negative. Indices beyond number rows input silently ignored. slice_*(), arguments passed methods. ., Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .preserve Relevant .data input grouped. .preserve = FALSE (default), grouping structure recalculated based resulting data, otherwise grouping kept . n, prop Provide either n, number rows, prop, proportion rows select. neither supplied, n = 1 used. n greater number rows group (prop > 1), result silently truncated group size. prop rounded towards zero generate integer number rows. negative value n prop subtracted group size. example, n = -2 group 5 rows select 5 - 2 = 3 rows; prop = -0.25 8 rows select 8 * (1 - 0.25) = 6 rows. order_by Variable function variables order . order multiple variables, wrap data frame tibble. with_ties ties kept together? default, TRUE, may return rows request. Use FALSE ignore ties, return first n rows. na_rm missing values order_by removed result? FALSE, NA values sorted end (like arrange()), included insufficient non-missing values reach n/prop. weight_by Sampling weights. must evaluate vector non-negative numbers length input. Weights automatically standardised sum 1. replace sampling performed (TRUE) without (FALSE, default) replacement.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Subset rows using their positions — slice","text":"object type .data. output following properties: row may appear 0, 1, many times output. Columns modified. Groups modified. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Subset rows using their positions — slice","text":"Slice work relational databases intrinsic notion row order. want perform equivalent operation, use filter() row_number().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Subset rows using their positions — slice","text":"function generics, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: slice(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_head(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_tail(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_min(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_max(): dbplyr (tbl_lazy), dplyr (data.frame) . slice_sample(): dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/slice.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Subset rows using their positions — slice","text":"","code":"# Similar to head(mtcars, 1): mtcars %>% slice(1L) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 # Similar to tail(mtcars, 1): mtcars %>% slice(n()) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 mtcars %>% slice(5:n()) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # Rows can be dropped with negative indices: slice(mtcars, -(1:4)) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # First and last rows based on existing order mtcars %>% slice_head(n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 #> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 #> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 mtcars %>% slice_tail(n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.6 1 1 4 2 # Rows with minimum and maximum values of a variable mtcars %>% slice_min(mpg, n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 mtcars %>% slice_max(mpg, n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 # slice_min() and slice_max() may return more rows than requested # in the presence of ties. mtcars %>% slice_min(cyl, n = 1) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 # Use with_ties = FALSE to return exactly n matches mtcars %>% slice_min(cyl, n = 1, with_ties = FALSE) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1 # Or use additional variables to break the tie: mtcars %>% slice_min(tibble(cyl, mpg), n = 1) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 # slice_sample() allows you to random select with or without replacement mtcars %>% slice_sample(n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8 #> Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4 #> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 mtcars %>% slice_sample(n = 5, replace = TRUE) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 # you can optionally weight by a variable - this code weights by the # physical weight of the cars, so heavy cars are more likely to get # selected mtcars %>% slice_sample(weight_by = wt, n = 5) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4 #> Dodge Challenger 15.5 8 318 150 2.76 3.520 16.87 0 0 3 2 #> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 #> Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 #> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 # Group wise operation ---------------------------------------- df <- tibble( group = rep(c(\"a\", \"b\", \"c\"), c(1, 2, 4)), x = runif(7) ) # All slice helpers operate per group, silently truncating to the group # size, so the following code works without error df %>% group_by(group) %>% slice_head(n = 2) #> # A tibble: 5 × 2 #> # Groups: group [3] #> group x #> #> 1 a 0.634 #> 2 b 0.771 #> 3 b 0.502 #> 4 c 0.711 #> 5 c 0.0919 # When specifying the proportion of rows to include non-integer sizes # are rounded down, so group a gets 0 rows df %>% group_by(group) %>% slice_head(prop = 0.5) #> # A tibble: 3 × 2 #> # Groups: group [2] #> group x #> #> 1 b 0.771 #> 2 c 0.711 #> 3 c 0.0919 # Filter equivalents -------------------------------------------- # slice() expressions can often be written to use `filter()` and # `row_number()`, which can also be translated to SQL. For many databases, # you'll need to supply an explicit variable to use to compute the row number. filter(mtcars, row_number() == 1L) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 filter(mtcars, row_number() == n()) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2 filter(mtcars, between(row_number(), 5, n())) #> mpg cyl disp hp drat wt qsec vs am gear carb #> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 #> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 #> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 #> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 #> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 #> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 #> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 #> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 #> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 #> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 #> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 #> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 #> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 #> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 #> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 #> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 #> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 #> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 #> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 #> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 #> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 #> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 #> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 #> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 #> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 #> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 #> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 #> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2"},{"path":"https://dplyr.tidyverse.org/dev/reference/sql.html","id":null,"dir":"Reference","previous_headings":"","what":"SQL escaping. — sql","title":"SQL escaping. — sql","text":"functions critical writing functions translate R functions sql functions. Typically conversion function escape inputs return sql object.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/sql.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"SQL escaping. — sql","text":"","code":"sql(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/sql.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"SQL escaping. — sql","text":"... Character vectors combined single SQL expression.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ","title":"Create a ","text":"src() standard constructor srcs .src() tests.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ","text":"","code":"src(subclass, ...) is.src(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a ","text":"subclass name subclass. \"src\" abstract base class, must supply value. src_ automatically prepended class name ... fields used object. dots evaluated explicit splicing. x object test \"src\"-ness.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":null,"dir":"Reference","previous_headings":"","what":"Source for database backends — src_dbi","title":"Source for database backends — src_dbi","text":"functions deprecated; instead please use tbl() directly DBIConnection. See https://dbplyr.tidyverse.org/ details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Source for database backends — src_dbi","text":"","code":"src_mysql( dbname, host = NULL, port = 0L, username = \"root\", password = \"\", ... ) src_postgres( dbname = NULL, host = NULL, port = NULL, user = NULL, password = NULL, ... ) src_sqlite(path, create = FALSE)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Source for database backends — src_dbi","text":"dbname Database name host, port Host name port number database ... src, arguments passed underlying database connector, DBI::dbConnect(). tbl, included compatibility generic, otherwise ignored. user, username, password User name password. Generally, avoid saving username password scripts easy accidentally expose valuable credentials. Instead, retrieve environment variables, use database specific credential scores. example, MySQL can set .cnf described RMySQL::MySQL(). path Path SQLite database. can use special path \":memory:\" create temporary memory database. create FALSE, path must already exist. TRUE, create new SQLite3 database path path exist connect existing database path exist.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Source for database backends — src_dbi","text":"S3 object class src_dbi, src_sql, src.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_dbi.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Source for database backends — src_dbi","text":"","code":"con <- DBI::dbConnect(RSQLite::SQLite(), \":memory:\") copy_to(con, mtcars) # To retrieve a single table from a source, use `tbl()` mtcars <- con %>% tbl(\"mtcars\") mtcars #> # Source: table<`mtcars`> [?? x 11] #> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ more rows # You can also use pass raw SQL if you want a more sophisticated query con %>% tbl(sql(\"SELECT * FROM mtcars WHERE cyl == 8\")) #> # Source: SQL [?? x 11] #> # Database: sqlite 3.46.0 [:memory:] #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 2 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 3 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 #> 4 17.3 8 276. 180 3.07 3.73 17.6 0 0 3 3 #> 5 15.2 8 276. 180 3.07 3.78 18 0 0 3 3 #> 6 10.4 8 472 205 2.93 5.25 18.0 0 0 3 4 #> 7 10.4 8 460 215 3 5.42 17.8 0 0 3 4 #> 8 14.7 8 440 230 3.23 5.34 17.4 0 0 3 4 #> 9 15.5 8 318 150 2.76 3.52 16.9 0 0 3 2 #> 10 15.2 8 304 150 3.15 3.44 17.3 0 0 3 2 #> # ℹ more rows"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_local.html","id":null,"dir":"Reference","previous_headings":"","what":"A local source — src_local","title":"A local source — src_local","text":"function deprecated since existed support style testing dplyr backends turned useful.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_local.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"A local source — src_local","text":"","code":"src_local(tbl, pkg = NULL, env = NULL) src_df(pkg = NULL, env = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_local.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"A local source — src_local","text":"tbl name function used generate tbl objects pkg, env Either name package environment object look objects.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_tbls.html","id":null,"dir":"Reference","previous_headings":"","what":"List all tbls provided by a source. — src_tbls","title":"List all tbls provided by a source. — src_tbls","text":"generic method individual src's provide methods . methods documented usually pretty obvious possible results .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/src_tbls.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List all tbls provided by a source. — src_tbls","text":"","code":"src_tbls(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/src_tbls.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List all tbls provided by a source. — src_tbls","text":"x data src. ... arguments passed individual methods.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":null,"dir":"Reference","previous_headings":"","what":"Starwars characters — starwars","title":"Starwars characters — starwars","text":"original data, SWAPI, Star Wars API, https://swapi.py4e.com/, revised reflect additional research gender sex determinations characters.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Starwars characters — starwars","text":"","code":"starwars"},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Starwars characters — starwars","text":"tibble 87 rows 14 variables: name Name character height Height (cm) mass Weight (kg) hair_color,skin_color,eye_color Hair, skin, eye colors birth_year Year born (BBY = Battle Yavin) sex biological sex character, namely male, female, hermaphroditic, none (case Droids). gender gender role gender identity character determined personality way programmed (case Droids). homeworld Name homeworld species Name species films List films character appeared vehicles List vehicles character piloted starships List starships character piloted","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/starwars.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Starwars characters — starwars","text":"","code":"starwars #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex #> #> 1 Luke Sky… 172 77 blond fair blue 19 male #> 2 C-3PO 167 75 NA gold yellow 112 none #> 3 R2-D2 96 32 NA white, bl… red 33 none #> 4 Darth Va… 202 136 none white yellow 41.9 male #> 5 Leia Org… 150 49 brown light brown 19 fema… #> 6 Owen Lars 178 120 brown, gr… light blue 52 male #> 7 Beru Whi… 165 75 brown light blue 47 fema… #> 8 R5-D4 97 32 NA white, red red NA none #> 9 Biggs Da… 183 84 black light brown 24 male #> 10 Obi-Wan … 182 77 auburn, w… fair blue-gray 57 male #> # ℹ 77 more rows #> # ℹ 6 more variables: gender , homeworld , species , #> # films , vehicles , starships "},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":null,"dir":"Reference","previous_headings":"","what":"Storm tracks data — storms","title":"Storm tracks data — storms","text":"dataset NOAA Atlantic hurricane database best track data, https://www.nhc.noaa.gov/data/#hurdat. data includes positions attributes storms 1975-2022. Storms 1979 onward measured every six hours lifetime storm. Storms earlier years missing data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Storm tracks data — storms","text":"","code":"storms"},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Storm tracks data — storms","text":"tibble 19,537 observations 13 variables: name Storm Name year,month,day Date report hour Hour report (UTC) lat,long Location storm center status Storm classification (Tropical Depression, Tropical Storm, Hurricane) category Saffir-Simpson hurricane category calculated wind speed. NA: hurricane 1: 64+ knots 2: 83+ knots 3: 96+ knots 4: 113+ knots 5: 137+ knots wind storm's maximum sustained wind speed (knots) pressure Air pressure storm's center (millibars) tropicalstorm_force_diameter Diameter (nautical miles) area experiencing tropical storm strength winds (34 knots ). available starting 2004. hurricane_force_diameter Diameter (nautical miles) area experiencing hurricane strength winds (64 knots ). available starting 2004.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/storms.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Storm tracks data — storms","text":"","code":"storms #> # A tibble: 19,537 × 13 #> name year month day hour lat long status category wind #> #> 1 Amy 1975 6 27 0 27.5 -79 tropical depr… NA 25 #> 2 Amy 1975 6 27 6 28.5 -79 tropical depr… NA 25 #> 3 Amy 1975 6 27 12 29.5 -79 tropical depr… NA 25 #> 4 Amy 1975 6 27 18 30.5 -79 tropical depr… NA 25 #> 5 Amy 1975 6 28 0 31.5 -78.8 tropical depr… NA 25 #> 6 Amy 1975 6 28 6 32.4 -78.7 tropical depr… NA 25 #> 7 Amy 1975 6 28 12 33.3 -78 tropical depr… NA 25 #> 8 Amy 1975 6 28 18 34 -77 tropical depr… NA 30 #> 9 Amy 1975 6 29 0 34.4 -75.8 tropical storm NA 35 #> 10 Amy 1975 6 29 6 34 -74.8 tropical storm NA 40 #> # ℹ 19,527 more rows #> # ℹ 3 more variables: pressure , tropicalstorm_force_diameter , #> # hurricane_force_diameter # Show a few recent storm paths if (requireNamespace(\"ggplot2\", quietly = TRUE)) { library(ggplot2) storms %>% filter(year >= 2000) %>% ggplot(aes(long, lat, color = paste(year, name))) + geom_path(show.legend = FALSE) + facet_wrap(~year) } storms #> # A tibble: 19,537 × 13 #> name year month day hour lat long status category wind #> #> 1 Amy 1975 6 27 0 27.5 -79 tropical depr… NA 25 #> 2 Amy 1975 6 27 6 28.5 -79 tropical depr… NA 25 #> 3 Amy 1975 6 27 12 29.5 -79 tropical depr… NA 25 #> 4 Amy 1975 6 27 18 30.5 -79 tropical depr… NA 25 #> 5 Amy 1975 6 28 0 31.5 -78.8 tropical depr… NA 25 #> 6 Amy 1975 6 28 6 32.4 -78.7 tropical depr… NA 25 #> 7 Amy 1975 6 28 12 33.3 -78 tropical depr… NA 25 #> 8 Amy 1975 6 28 18 34 -77 tropical depr… NA 30 #> 9 Amy 1975 6 29 0 34.4 -75.8 tropical storm NA 35 #> 10 Amy 1975 6 29 6 34 -74.8 tropical storm NA 40 #> # ℹ 19,527 more rows #> # ℹ 3 more variables: pressure , tropicalstorm_force_diameter , #> # hurricane_force_diameter "},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarise each group down to one row — summarise","title":"Summarise each group down to one row — summarise","text":"summarise() creates new data frame. returns one row combination grouping variables; grouping variables, output single row summarising observations input. contain one column grouping variable one column summary statistics specified. summarise() summarize() synonyms.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarise each group down to one row — summarise","text":"","code":"summarise(.data, ..., .by = NULL, .groups = NULL) summarize(.data, ..., .by = NULL, .groups = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarise each group down to one row — summarise","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs summary functions. name name variable result. value can : vector length 1, e.g. min(x), n(), sum(.na(y)). data frame, add multiple columns single expression. Returning values size 0 >1 deprecated 1.1.0. Please use reframe() instead. . Optionally, selection columns group just operation, functioning alternative group_by(). details examples, see ?dplyr_by. .groups Grouping structure result. \"drop_last\": dropping last level grouping. supported option version 1.0.0. \"drop\": levels grouping dropped. \"keep\": grouping structure .data. \"rowwise\": row group. .groups specified, chosen based number rows results: results 1 row, get \"drop_last\". number rows varies, get \"keep\" (note returning variable number rows deprecated favor reframe(), also unconditionally drops levels grouping). addition, message informs choice, unless result ungrouped, option \"dplyr.summarise.inform\" set FALSE, summarise() called function package.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarise each group down to one row — summarise","text":"object usually type .data. rows come underlying group_keys(). columns combination grouping keys summary expressions provide. grouping structure controlled .groups= argument, output may another grouped_df, tibble rowwise data frame. Data frame attributes preserved, summarise() fundamentally creates new data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"useful-functions","dir":"Reference","previous_headings":"","what":"Useful functions","title":"Summarise each group down to one row — summarise","text":"Center: mean(), median() Spread: sd(), IQR(), mad() Range: min(), max(), Position: first(), last(), nth(), Count: n(), n_distinct() Logical: (), ()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"backend-variations","dir":"Reference","previous_headings":"","what":"Backend variations","title":"Summarise each group down to one row — summarise","text":"data frame backend supports creating variable using summary. means previously created summary variables can transformed combined within summary, mutate(). However, also means summary variables names previous variables overwrite , making variables unavailable later summary variables. behaviour may supported backends. avoid unexpected results, consider using new names summary variables, especially creating multiple summaries.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Summarise each group down to one row — summarise","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. following methods currently available loaded packages: dbplyr (tbl_lazy), dplyr (data.frame, grouped_df, rowwise_df) .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarise each group down to one row — summarise","text":"","code":"# A summary applied to ungrouped tbl returns a single row mtcars %>% summarise(mean = mean(disp), n = n()) #> mean n #> 1 230.7219 32 # Usually, you'll want to group first mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) #> # A tibble: 3 × 3 #> cyl mean n #> #> 1 4 105. 11 #> 2 6 183. 7 #> 3 8 353. 14 # Each summary call removes one grouping level (since that group # is now just a single row) mtcars %>% group_by(cyl, vs) %>% summarise(cyl_n = n()) %>% group_vars() #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> [1] \"cyl\" # BEWARE: reusing variables may lead to unexpected results mtcars %>% group_by(cyl) %>% summarise(disp = mean(disp), sd = sd(disp)) #> # A tibble: 3 × 3 #> cyl disp sd #> #> 1 4 105. NA #> 2 6 183. NA #> 3 8 353. NA # Refer to column names stored as strings with the `.data` pronoun: var <- \"mass\" summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE)) #> # A tibble: 1 × 1 #> avg #> #> 1 97.3 # Learn more in ?rlang::args_data_masking # In dplyr 1.1.0, returning multiple rows per group was deprecated in favor # of `reframe()`, which never messages and always returns an ungrouped # result: mtcars %>% group_by(cyl) %>% summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) #> Warning: Returning more (or less) than 1 row per `summarise()` group was #> deprecated in dplyr 1.1.0. #> ℹ Please use `reframe()` instead. #> ℹ When switching from `summarise()` to `reframe()`, remember that #> `reframe()` always returns an ungrouped data frame and adjust #> accordingly. #> `summarise()` has grouped output by 'cyl'. You can override using the #> `.groups` argument. #> # A tibble: 6 × 3 #> # Groups: cyl [3] #> cyl qs prob #> #> 1 4 78.8 0.25 #> 2 4 121. 0.75 #> 3 6 160 0.25 #> 4 6 196. 0.75 #> 5 8 302. 0.25 #> 6 8 390 0.75 # -> mtcars %>% group_by(cyl) %>% reframe(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) #> # A tibble: 6 × 3 #> cyl qs prob #> #> 1 4 78.8 0.25 #> 2 4 121. 0.75 #> 3 6 160 0.25 #> 4 6 196. 0.75 #> 5 8 302. 0.25 #> 6 8 390 0.75"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarise multiple columns — summarise_all","title":"Summarise multiple columns — summarise_all","text":"Scoped verbs (_if, _at, _all) superseded use pick() across() existing verb. See vignette(\"colwise\") details. scoped variants summarise() make easy apply transformation multiple variables. three variants. summarise_all() affects every variable summarise_at() affects variables selected character vector vars() summarise_if() affects variables selected predicate function","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarise multiple columns — summarise_all","text":"","code":"summarise_all(.tbl, .funs, ...) summarise_if(.tbl, .predicate, .funs, ...) summarise_at(.tbl, .vars, .funs, ..., .cols = NULL) summarize_all(.tbl, .funs, ...) summarize_if(.tbl, .predicate, .funs, ...) summarize_at(.tbl, .vars, .funs, ..., .cols = NULL)"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarise multiple columns — summarise_all","text":".tbl tbl object. .funs function fun, quosure style lambda ~ fun(.) list either form. ... Additional arguments function calls .funs. evaluated , tidy dots support. .predicate predicate function applied columns logical vector. variables .predicate returns TRUE selected. argument passed rlang::as_function() thus supports quosure-style lambda functions strings representing function names. .vars list columns generated vars(), character vector column names, numeric vector column positions, NULL. .cols argument renamed .vars fit dplyr's terminology deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarise multiple columns — summarise_all","text":"data frame. default, newly created columns shortest names needed uniquely identify output. force inclusion name, even needed, name input (see examples details).","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"grouping-variables","dir":"Reference","previous_headings":"","what":"Grouping variables","title":"Summarise multiple columns — summarise_all","text":"applied grouped tibble, operations applied grouping variables. behaviour depends whether selection implicit (selections) explicit (selections). Grouping variables covered explicit selections summarise_at() always error. Add -group_cols() vars() selection avoid : remove group_vars() character vector column names: Grouping variables covered implicit selections silently ignored summarise_all() summarise_if().","code":"data %>% summarise_at(vars(-group_cols(), ...), myoperation) nms <- setdiff(nms, group_vars(data)) data %>% summarise_at(nms, myoperation)"},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"naming","dir":"Reference","previous_headings":"","what":"Naming","title":"Summarise multiple columns — summarise_all","text":"names new columns derived names input variables names functions. one unnamed function (.e. .funs unnamed list length one), names input variables used name new columns; _at functions, one unnamed variable (.e., .vars form vars(a_single_column)) .funs length greater one, names functions used name new columns; otherwise, new names created concatenating names input variables names functions, separated underscore \"_\". .funs argument can named unnamed list. function unnamed name derived automatically, name form \"fn#\" used. Similarly, vars() accepts named unnamed arguments. variable .vars named, new column name created. Name collisions new columns disambiguated using unique suffix.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_all.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Summarise multiple columns — summarise_all","text":"","code":"# The _at() variants directly support strings: starwars %>% summarise_at(c(\"height\", \"mass\"), mean, na.rm = TRUE) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # -> starwars %>% summarise(across(c(\"height\", \"mass\"), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): starwars %>% summarise_at(vars(height:mass), mean, na.rm = TRUE) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # -> starwars %>% summarise(across(height:mass, ~ mean(.x, na.rm = TRUE))) #> # A tibble: 1 × 2 #> height mass #> #> 1 175. 97.3 # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we apply mean() to the numeric columns: starwars %>% summarise_if(is.numeric, mean, na.rm = TRUE) #> # A tibble: 1 × 3 #> height mass birth_year #> #> 1 175. 97.3 87.6 starwars %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) #> # A tibble: 1 × 3 #> height mass birth_year #> #> 1 175. 97.3 87.6 by_species <- iris %>% group_by(Species) # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: by_species %>% summarise_all(list(min, max)) #> # A tibble: 3 × 9 #> Species Sepal.Length_fn1 Sepal.Width_fn1 Petal.Length_fn1 #> #> 1 setosa 4.3 2.3 1 #> 2 versicolor 4.9 2 3 #> 3 virginica 4.9 2.2 4.5 #> # ℹ 5 more variables: Petal.Width_fn1 , Sepal.Length_fn2 , #> # Sepal.Width_fn2 , Petal.Length_fn2 , Petal.Width_fn2 # -> by_species %>% summarise(across(everything(), list(min = min, max = max))) #> # A tibble: 3 × 9 #> Species Sepal.Length_min Sepal.Length_max Sepal.Width_min #> #> 1 setosa 4.3 5.8 2.3 #> 2 versicolor 4.9 7 2 #> 3 virginica 4.9 7.9 2.2 #> # ℹ 5 more variables: Sepal.Width_max , Petal.Length_min , #> # Petal.Length_max , Petal.Width_min , Petal.Width_max "},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_each.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarise and mutate multiple columns. — summarise_each","title":"Summarise and mutate multiple columns. — summarise_each","text":"mutate_each() summarise_each() deprecated favour new across() function works within summarise() mutate().","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/summarise_each.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarise and mutate multiple columns. — summarise_each","text":"","code":"summarise_each(tbl, funs, ...) summarise_each_(tbl, funs, vars) mutate_each(tbl, funs, ...) mutate_each_(tbl, funs, vars) summarize_each(tbl, funs, ...) summarize_each_(tbl, funs, vars)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a table from a data source — tbl","title":"Create a table from a data source — tbl","text":"generic method dispatches based first argument.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a table from a data source — tbl","text":"","code":"tbl(src, ...) is.tbl(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a table from a data source — tbl","text":"src data source ... arguments passed individual methods x object","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_df.html","id":null,"dir":"Reference","previous_headings":"","what":"Coerce to a tibble — tbl_df","title":"Coerce to a tibble — tbl_df","text":"Please use tibble::as_tibble() instead.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_df.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coerce to a tibble — tbl_df","text":"","code":"tbl_df(data) as.tbl(x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_df.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Coerce to a tibble — tbl_df","text":"data, x Object coerce","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_ptype.html","id":null,"dir":"Reference","previous_headings":"","what":"Return a prototype of a tbl — tbl_ptype","title":"Return a prototype of a tbl — tbl_ptype","text":"Used _if functions enable type-based selection even data lazily generated. either return complete tibble, can computed quickly, 0-row tibble columns correct type.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_ptype.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Return a prototype of a tbl — tbl_ptype","text":"","code":"tbl_ptype(.data)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_vars.html","id":null,"dir":"Reference","previous_headings":"","what":"List variables provided by a tbl. — tbl_vars","title":"List variables provided by a tbl. — tbl_vars","text":"tbl_vars() returns variables tbl_nongroup_vars() returns non-grouping variables. groups attribute object returned tbl_vars() character vector grouping columns.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_vars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List variables provided by a tbl. — tbl_vars","text":"","code":"tbl_vars(x) tbl_nongroup_vars(x)"},{"path":"https://dplyr.tidyverse.org/dev/reference/tbl_vars.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List variables provided by a tbl. — tbl_vars","text":"x tbl object","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/tidyeval-compat.html","id":null,"dir":"Reference","previous_headings":"","what":"Other tidy eval tools — tidyeval-compat","title":"Other tidy eval tools — tidyeval-compat","text":"tidy eval functions longer normal usage, still exported dplyr backward compatibility. See ?rlang::args_data_masking vignette(\"programming\") latest recommendations. expr() enquo() enquos() sym() syms() as_label() quo() quos() quo_name() ensym() ensyms() enexpr() enexprs()","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":null,"dir":"Reference","previous_headings":"","what":"Select top (or bottom) n rows (by value) — top_n","title":"Select top (or bottom) n rows (by value) — top_n","text":"top_n() superseded favour slice_min()/slice_max(). deprecated near future, retirement means perform critical bug fixes, recommend moving newer alternatives. top_n() superseded name fundamentally confusing returned might reasonably consider bottom rows. Additionally, wt variable confusing name, strange default (last column data frame). Unfortunately see easy way fix existing top_n() function without breaking existing code, created new alternative.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select top (or bottom) n rows (by value) — top_n","text":"","code":"top_n(x, n, wt) top_frac(x, n, wt)"},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select top (or bottom) n rows (by value) — top_n","text":"x data frame. n Number rows return top_n(), fraction rows return top_frac(). n positive, selects top rows. negative, selects bottom rows. x grouped, number (fraction) rows per group. include rows ties. wt (Optional). variable use ordering. specified, defaults last variable tbl.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/top_n.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Select top (or bottom) n rows (by value) — top_n","text":"","code":"df <- data.frame(x = c(6, 4, 1, 10, 3, 1, 1)) df %>% top_n(2) # highest values #> Selecting by x #> x #> 1 6 #> 2 10 df %>% top_n(-2) # lowest values #> Selecting by x #> x #> 1 1 #> 2 1 #> 3 1 # now use df %>% slice_max(x, n = 2) #> x #> 1 10 #> 2 6 df %>% slice_min(x, n = 2) #> x #> 1 1 #> 2 1 #> 3 1 # top_frac() -> prop argument of slice_min()/slice_max() df %>% top_frac(.5) #> Selecting by x #> x #> 1 6 #> 2 4 #> 3 10 # -> df %>% slice_max(x, prop = 0.5) #> x #> 1 10 #> 2 6 #> 3 4"},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":null,"dir":"Reference","previous_headings":"","what":"Create, modify, and delete columns — transmute","title":"Create, modify, and delete columns — transmute","text":"transmute() creates new data frame containing specified computations. superseded can perform job mutate(.keep = \"none\").","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create, modify, and delete columns — transmute","text":"","code":"transmute(.data, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create, modify, and delete columns — transmute","text":".data data frame, data frame extension (e.g. tibble), lazy data frame (e.g. dbplyr dtplyr). See Methods, , details. ... Name-value pairs. name gives name column output. value can : vector length 1, recycled correct length. vector length current group (whole data frame ungrouped). NULL, remove column. data frame tibble, create multiple columns output.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create, modify, and delete columns — transmute","text":"object type .data. output following properties: Columns created modified ... returned order specified .... Unmodified grouping columns placed front. number rows affected. Columns given value NULL removed. Groups recomputed grouping variable mutated. Data frame attributes preserved.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/transmute.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Create, modify, and delete columns — transmute","text":"function generic, means packages can provide implementations (methods) classes. See documentation individual methods extra arguments differences behaviour. Methods available currently loaded packages: dbplyr (tbl_lazy), dplyr (data.frame) .","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/vars.html","id":null,"dir":"Reference","previous_headings":"","what":"Select variables — vars","title":"Select variables — vars","text":"vars() superseded needed scoped verbs (.e. mutate_at(), summarise_at(), friends), superseded favour across(). See vignette(\"colwise\") details. helper intended provide tidy-select semantics scoped verbs like mutate_at() summarise_at(). Note anywhere can supply vars() specification, can also supply numeric vector column positions character vector column names.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/vars.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Select variables — vars","text":"","code":"vars(...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/vars.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Select variables — vars","text":"... Variables operate .","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":null,"dir":"Reference","previous_headings":"","what":"Perform an operation with temporary groups — with_groups","title":"Perform an operation with temporary groups — with_groups","text":"experimental function allows modify grouping variables single operation; superseded favour using .argument individual verbs.","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Perform an operation with temporary groups — with_groups","text":"","code":"with_groups(.data, .groups, .f, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Perform an operation with temporary groups — with_groups","text":".data data frame .groups One variables group . Unlike group_by(), can group existing variables, can use tidy-select syntax like c(x, y, z) select multiple variables. Use NULL temporarily ungroup. .f Function apply regrouped data. Supports purrr-style ~ syntax ... Additional arguments passed ....","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/with_groups.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Perform an operation with temporary groups — with_groups","text":"","code":"df <- tibble(g = c(1, 1, 2, 2, 3), x = runif(5)) # Old df %>% with_groups(g, mutate, x_mean = mean(x)) #> # A tibble: 5 × 3 #> g x x_mean #> #> 1 1 0.764 0.791 #> 2 1 0.819 0.791 #> 3 2 0.761 0.795 #> 4 2 0.829 0.795 #> 5 3 0.00851 0.00851 # New df %>% mutate(x_mean = mean(x), .by = g) #> # A tibble: 5 × 3 #> g x x_mean #> #> 1 1 0.764 0.791 #> 2 1 0.819 0.791 #> 3 2 0.761 0.795 #> 4 2 0.829 0.795 #> 5 3 0.00851 0.00851"},{"path":"https://dplyr.tidyverse.org/dev/reference/with_order.html","id":null,"dir":"Reference","previous_headings":"","what":"Run a function with one order, translating result back to original order — with_order","title":"Run a function with one order, translating result back to original order — with_order","text":"used power ordering parameters dplyr's window functions","code":""},{"path":"https://dplyr.tidyverse.org/dev/reference/with_order.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run a function with one order, translating result back to original order — with_order","text":"","code":"with_order(order_by, fun, x, ...)"},{"path":"https://dplyr.tidyverse.org/dev/reference/with_order.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run a function with one order, translating result back to original order — with_order","text":"order_by vector order fun window function x, ... arguments f","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-development-version","dir":"Changelog","previous_headings":"","what":"dplyr (development version)","title":"dplyr (development version)","text":"R >=3.6.0 now explicitly required (#7026).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-114","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.4","title":"dplyr 1.1.4","text":"CRAN release: 2023-11-17 join_by() now allows helper functions namespaced dplyr::, like join_by(dplyr::(x, lower, upper)) (#6838). left_join() friends now return specialized error message detect join return rows dplyr can handle (#6912). slice_*() now throw correct error forget name n also prefixing call dplyr:: (#6946). dplyr_reconstruct()’s default method rewritten avoid materializing duckplyr queries early (#6947). Updated storms data include 2022 data (#6937, @steveharoz). Updated starwars data use new API, old one defunct. minor changes data (#6938, @steveharoz).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-113","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.3","title":"dplyr 1.1.3","text":"CRAN release: 2023-09-03 mutate_each() summarise_each() now throw correct deprecation messages (#6869). setequal() now requires input data frames compatible, similar set methods like setdiff() intersect() (#6786).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-112","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.2","title":"dplyr 1.1.2","text":"CRAN release: 2023-04-20 count() better documents .drop argument (#6820). Fixed tests maintain compatibility next version waldo (#6823). Joins better handle key columns NAs (#6804).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-111","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.1","title":"dplyr 1.1.1","text":"CRAN release: 2023-03-22 Mutating joins now warn multiple matches much less often. high level, warning previously thrown one--many many--many relationship detected keys x y, now thrown many--many relationship, much rarer much dangerous one--many can result Cartesian explosion number rows returned join (#6731, #6717). ’ve accomplished two steps: multiple now defaults \"\", options \"error\" \"warning\" now deprecated favor using relationship (see ). using accelerated deprecation process two options ’ve available weeks, relationship clearly superior alternative. mutating joins gain new relationship argument, allowing optionally enforce one following relationship constraints keys x y: \"one--one\", \"one--many\", \"many--one\", \"many--many\". example, \"many--one\" enforces row x can match 1 row y. row x matches >1 rows y, error thrown. option serves replacement multiple = \"error\". default behavior relationship doesn’t assume relationship x y. However, equality joins check presence many--many relationship, warn detects one. change unfortunately mean set multiple = \"\" avoid warning happened many--many style join, need replace multiple = \"\" relationship = \"many--many\" silence new warning, believe rare since many--many relationships fairly uncommon. Fixed major performance regression case_when(). still little slower dplyr 1.0.10, plan improve future (#6674). Fixed performance regression related nth(), first(), last() (#6682). Fixed issue expressions involving infix operators abnormally large amount overhead (#6681). group_data() ungrouped data frames faster (#6736). n() little faster many groups (#6727). pick() now returns 1 row, 0 column tibble ... evaluates empty selection. makes compatible tidyverse recycling rules edge cases (#6685). if_else() case_when() accept logical conditions attributes (#6678). arrange() can sort numeric_version type base R (#6680). slice_sample() now works input column named replace. slice_min() slice_max() now work input columns named na_rm with_ties (#6725). nth() now errors informatively n NA (#6682). Joins now throw informative error y doesn’t source x (#6798). major dplyr verbs now throw informative error message input data frame contains column named NA \"\" (#6758). Deprecation warnings thrown filter() now mention correct package problem originated (#6679). Fixed issue using <- within grouped mutate() summarise() cross contaminate groups (#6666). compatibility vignette replaced general vignette using dplyr packages, vignette(\"-packages\") (#6702). developer documentation ?dplyr_extending refreshed brought date changes made 1.1.0 (#6695). rename_with() now includes example using paste0(recycle0 = TRUE) correctly handle empty selections (#6688). R >=3.5.0 now explicitly required. line tidyverse policy supporting 5 recent versions R.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-110","dir":"Changelog","previous_headings":"","what":"dplyr 1.1.0","title":"dplyr 1.1.0","text":"CRAN release: 2023-01-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-1-1-0","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 1.1.0","text":"./experimental alternative group_by() supports per-operation grouping mutate(), summarise(), filter(), slice() family (#6528). Rather : can now write: useful reason .affects single operation. example , ungrouped data frame went summarise() call, ungrouped data frame come ; ., never need remember ungroup() afterwards never need use .groups argument. Additionally, using summarise() .never sort results group key, unlike group_by(). Instead, results returned using existing ordering groups original data. feel predictable, better maintains ordering might already applied previous call arrange(), provides way maintain current ordering without resort factors. feature inspired data.table, equivalent syntax looks like: with_groups() superseded favor .(#6582). reframe() new experimental verb creates new data frame applying functions columns existing data frame. similar summarise(), two big differences: reframe() can return arbitrary number rows per group, summarise() reduces group single row. reframe() always returns ungrouped data frame, summarise() might return grouped rowwise data frame, depending scenario. reframe() added response valid concern community allowing summarise() return number rows per group increases chance accidental bugs. still feel powerful technique, principled replacement (), moved features reframe() (#6382). group_by() now uses new algorithm computing groups. often faster previous approach (especially many groups), cases changes. one exception character vectors, see C locale news bullet details (#4406, #6297). arrange() now uses faster algorithm sorting character vectors, heavily inspired data.table’s forder(). See C locale news bullet details (#4962). Joins completely overhauled enable flexible join operations provide tools quality control. Many changes inspired data.table’s join syntax (#5914, #5661, #5413, #2240). join specification can now created join_by(). allows specify left right hand side join using unquoted column names, join_by(sale_date == commercial_date). Join specifications can supplied *_join() function argument. Join specifications allow new types joins: Equality joins: common join, specified ==. example, join_by(sale_date == commercial_date). Inequality joins: joining inequalities, .e.>=, >, <, <=. example, use join_by(sale_date >= commercial_date) find every commercial aired particular sale. Rolling joins: “rolling” closest match forward backwards isn’t exact match, specified using rolling helper, closest(). example, join_by(closest(sale_date >= commercial_date)) find recent commercial aired particular sale. Overlap joins: detecting overlaps sets columns, specified using one overlap helpers: (), within(), overlaps(). example, use join_by((commercial_date, sale_date_lower, sale_date)) find commercials aired particular sale, long occurred lower bound, 40 days sale made. Note use arbitrary expressions join conditions, like join_by(sale_date - 40 >= commercial_date). Instead, use mutate() create new column containing result sale_date - 40 refer name join_by(). multiple new argument controlling happens row x matches multiple rows y. equality joins rolling joins, usually surprising, defaults signalling \"warning\", still returns matches. inequality joins, multiple matches usually expected, defaults returning \"\" matches. can also return \"first\" \"last\" match, \"\" matches, can \"error\". keep now defaults NULL rather FALSE. NULL implies keep = FALSE equality conditions, keep = TRUE inequality conditions, since generally want preserve sides inequality join. unmatched new argument controlling happens row dropped doesn’t match. backwards compatibility, default \"drop\", can also choose \"error\" dropped rows surprising. across() gains experimental .unpack argument optionally unpack (, tidyr::unpack()) data frames returned functions .fns (#6360). consecutive_id() creating groups based contiguous runs values, like data.table::rleid() (#1534). case_match() “vectorised switch” variant case_when() matches values rather logical expressions. like SQL “simple” CASE statement, whereas case_when() like SQL “searched” CASE statement (#6328). cross_join() explicit slightly correct replacement using = character() join (#6604). pick() makes easy access subset columns current group. pick() intended replacement across(.fns = NULL), cur_data(), cur_data_all(). feel pick() much evocative name just trying select subset columns data (#6204). symdiff() computes symmetric difference (#4811).","code":"starwars %>% group_by(species, homeworld) %>% summarise(mean_height = mean(height)) starwars %>% summarise( mean_height = mean(height), .by = c(species, homeworld) ) starwars[, .(mean_height = mean(height)), by = .(species, homeworld)]"},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Breaking changes","title":"dplyr 1.1.0","text":"arrange() group_by() now use C locale, system locale, ordering grouping character vectors. brings substantial performance improvements, increases reproducibility across R sessions, makes dplyr consistent data.table, believe affect little existing code. affect code, can use options(dplyr.legacy_locale = TRUE) quickly revert previous behavior. However, general, instead recommend use new .locale argument precisely specify desired locale. full explanation please read associated grouping ordering tidyups. bench_tbls(), compare_tbls(), compare_tbls2(), eval_tbls(), eval_tbls2(), location() changes(), deprecated 1.0.0, now defunct (#6387). frame_data(), data_frame_(), lst_() tbl_sum() longer re-exported tibble (#6276, #6277, #6278, #6284). select_vars(), rename_vars(), select_var() current_vars(), deprecated 0.8.4, now defunct (#6387).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"newly-deprecated-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Newly deprecated","title":"dplyr 1.1.0","text":"across(), c_across(), if_any(), if_all() now require .cols .fns arguments. general, now recommend use pick() instead empty across() call across() .fns (e.g. across(c(x, y)). (#6523). Relying previous default .cols = everything() deprecated. skipped soft-deprecation stage case, indirect usage across() friends way rare. Relying previous default .fns = NULL yet formally soft-deprecated, good alternative now, discouraged soft-deprecated next minor release. Passing ... across() soft-deprecated ’s ambiguous arguments evaluated. Now, instead (e.g.) across(:b, mean, na.rm = TRUE) write across(:b, ~ mean(.x, na.rm = TRUE)) (#6073). all_equal() deprecated. ’ve advised time, explicitly recommend use .equal(), manually reordering rows columns needed (#6324). cur_data() cur_data_all() soft-deprecated favour pick() (#6204). Using = character() perform cross join now soft-deprecated favor cross_join() (#6604). filter()ing 1-column matrix deprecated (#6091). progress_estimate() deprecated uses (#6387). Using summarise() produce 0 >1 row “summary” deprecated favor new reframe(). See NEWS bullet reframe() details (#6382). functions deprecated 1.0.0 (released April 2020) earlier now warn every time use (#6387). includes combine(), src_local(), src_mysql(), src_postgres(), src_sqlite(), rename_vars_(), select_vars_(), summarise_each_(), mutate_each_(), .tbl(), tbl_df(), handful older arguments. likely made defunct next major version (mid 2024). slice()ing 1-column matrix deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"newly-superseded-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Newly superseded","title":"dplyr 1.1.0","text":"recode() superseded favour case_match() (#6433). recode_factor() superseded. don’t direct replacement yet, plan add one forcats. meantime can often use case_match(.ptype = factor(levels = )) instead (#6433). transmute() superseded favour mutate(.keep = \"none\") (#6414).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"newly-stable-1-1-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Newly stable","title":"dplyr 1.1.0","text":".keep, ., .arguments mutate() moved experimental stable. rows_*() family functions moved experimental stable.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vctrs-1-1-0","dir":"Changelog","previous_headings":"","what":"vctrs","title":"dplyr 1.1.0","text":"Many dplyr’s vector functions rewritten make use vctrs package, bringing greater consistency improved performance. () can now work vector types, just numeric date-time. Additionally, left right can now also vectors (length x), x, left, right cast common type comparison made (#6183, #6260, #6478). case_when() (#5106): new .default argument intended replace usage TRUE ~ default_value explicit readable way specify default value. future, deprecate unsafe recycling LHS inputs allows TRUE ~ work, encourage switch using .default. longer requires exact matching types RHS values. example, following longer requires use NA_character_. Supports larger variety RHS value types. example, can use data frame create multiple columns . new .ptype .size arguments allow enforce particular output type size. better error types lengths incompatible (#6261, #6206). coalesce() (#6265): Discards NULL inputs front. longer iterates columns data frame input. Instead, row now coalesced entirely missing, consistent vctrs::vec_detect_missing() greatly simplifies implementation. new .ptype .size arguments allow enforce particular output type size. first(), last(), nth() (#6331): used data frame, functions now return single row rather single column. consistent vctrs principle data frame generally treated vector rows. default longer “guessed”, always automatically set missing value appropriate type x. Error n integer. nth(x, n = 2) fine, nth(x, n = 2.5) now error. longer support indexing scalar objects, like scalar S4 objects (#6670). Additionally, gained na_rm argument since summary functions (#6242, contributions @tnederlof). if_else() gains benefits case_when(). particular, if_else() now takes common type true, false, missing determine output type, meaning can now reliably use NA, rather NA_character_ friends (#6243). if_else() also longer allows supply NULL either true false, undocumented usage consider -label, true false intended (documented ) vector inputs (#6730). na_if() (#6329) now casts y type x comparison, makes clearer function type size stable x. particular, means can longer na_if(, 0), previously accidentally allowed replace instance 0 across every column tibble NA. na_if() never intended work way, considered -label usage. can also now replace NaN values x na_if(x, NaN). lag() lead() now cast default type x, rather taking common type. ensures functions type stable x (#6330). row_number(), min_rank(), dense_rank(), ntile(), cume_dist(), percent_rank() faster work types. can now rank multiple columns supplying data frame (#6428). with_order() now checks size order_by size x, now works correctly order_by data frame (#6334).","code":"x <- c(\"little\", \"unknown\", \"small\", \"missing\", \"large\") case_when( x %in% c(\"little\", \"small\") ~ \"one\", x %in% c(\"big\", \"large\") ~ \"two\", x %in% c(\"missing\", \"unknown\") ~ NA )"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-and-bug-fixes-1-1-0","dir":"Changelog","previous_headings":"","what":"Minor improvements and bug fixes","title":"dplyr 1.1.0","text":"Fixed issue latest rlang caused internal tools (mask$eval_all_summarise()) mentioned error messages (#6308). Warnings enriched contextualised information summarise() filter() just like mutate() arrange(). Joins now reference correct column y type error thrown joining two columns different names (#6465). Joins wide tables longer bottlenecked application suffix (#6642). *_join() now error supply additional arguments aren’t used (#6228). across() used without functions inside rowwise-data frame longer generates invalid data frame (#6264). Anonymous functions supplied function() \\() now inlined across() possible, slightly improves performance makes possible optimisations future. Functions supplied across() longer masked columns (#6545). instance, across(1:2, mean) now work expected even column called mean. across() now error supplied ... without .fns argument (#6638). arrange() now correctly ignores NULL inputs (#6193). arrange() now works correctly across() calls used 2nd () ordering expression (#6495). arrange(df, mydesc::desc(x)) works correctly mydesc re-exports dplyr::desc() (#6231). c_across() now evaluates all_of() correctly longer allows accidentally select grouping variables (#6522). c_across() now throws informative error try rename column selection (#6522). dplyr longer provides count() tally() methods tbl_sql. methods accidentally overriding tbl_lazy methods dbplyr provides, resulted issues grouping structure output (#6338, tidyverse/dbplyr#940). cur_group() now works correctly zero row grouped data frames (#6304). desc() gives useful error message give non-vector (#6028). distinct() now retains attributes bare data frames (#6318). distinct() returns columns ordered way request, input data (#6156). Error messages group_by(), distinct(), tally(), count() now relevant (#6139). group_by_prepare() loses caller_env argument. rarely used longer needed (#6444). group_walk() gains explicit .keep argument (#6530). Warnings emitted inside mutate() variants now collected stashed away. Run new last_dplyr_warnings() function see warnings emitted within dplyr verbs last top-level command. fixes performance issues thousands warnings emitted rowwise grouped data frames (#6005, #6236). mutate() behaves little better 0-row rowwise inputs (#6303). rowwise mutate() now automatically unlists list-columns containing length 1 vectors (#6302). nest_join() gained na_matches argument joins . nest_join() now preserves type y (#6295). n_distinct() now errors don’t give input (#6535). nth(), first(), last(), with_order() now sort character order_by vectors C locale. Using character vectors order_by rare, expect little practical impact (#6451). ntile() now requires n single positive integer. relocate() now works correctly empty data frames ..result empty selections (#6167). relocate() longer drops attributes bare data frames (#6341). relocate() now retains last name change single column renamed multiple times moved. better matches behavior rename() (#6209, help @eutwt). rename() now contains examples using all_of() any_of() rename using named character vector (#6644). rename_with() now disallows renaming .cols tidy-selection (#6561). rename_with() now checks result .fn right type size (#6561). rows_insert() now checks y contains columns (#6652). setequal() ignores differences freely coercible types (e.g. integer double) (#6114) ignores duplicated rows (#6057). slice() helpers produce output equivalent slice(.data, 0) n prop argument 0, fixing bug introduced previous version (@eutwt, #6184). slice() inputs now returns 0 rows. mostly theoretical consistency (#6573). slice() now errors expressions ... named. helps avoid accidentally misspelling optional argument, .(#6554). slice_*() now requires n integer. slice_*() generics now perform argument validation. make methods consistent simpler implement (#6361). slice_min() slice_max() can order_by multiple variables supply data.frame tibble (#6176). slice_min() slice_max() now consistently include missing values result necessary (.e. aren’t enough non-missing values reach n prop selected). don’t want missing values included , set na_rm = TRUE (#6177). slice_sample() now accepts negative n prop values (#6402). slice_sample() returns data frame group number rows input replace = FALSE n larger number rows prop larger 1. reverts change made 1.0.8, returning behavior 1.0.7 (#6185) slice_sample() now gives informative error replace = FALSE number rows requested sample exceeds number rows data (#6271). storms updated include 2021 data missing storms omitted due error (@steveharoz, #6320). summarise() now correctly recycles named 0-column data frames (#6509). union_all(), like union(), now requires data frames compatible: .e. columns, columns compatible types. () re-exported tidyselect (#6597).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-1010","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.10","title":"dplyr 1.0.10","text":"CRAN release: 2022-09-01 Hot patch release resolve R CMD check failures.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-109","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.9","title":"dplyr 1.0.9","text":"CRAN release: 2022-04-28 New rows_append() works like rows_insert() ignores keys allows insert arbitrary rows guarantee type x won’t change (#6249, thanks @krlmlr implementation @mgirlich idea). rows_*() functions longer require key values x uniquely identify row. Additionally, rows_insert() rows_delete() longer require key values y uniquely identify row. Relaxing restriction make functions practically useful data frames, alternative backends can enforce ways needed (.e. primary keys) (#5553). rows_insert() gained new conflict argument allowing greater control rows y keys conflict keys x. conflict arises key y already exists x. default, conflict results error, can now also \"ignore\" y rows. similar CONFLICT NOTHING command SQL (#5588, helpful additions @mgirlich @krlmlr). rows_update(), rows_patch(), rows_delete() gained new unmatched argument allowing greater control rows y keys unmatched keys x. default, unmatched key results error, can now also \"ignore\" y rows (#5984, #5699). rows_delete() longer requires columns y strict subset x. columns specified utilized y, others dropped message. rows_*() functions now always retain column types x. behavior documented, previously wasn’t applied correctly (#6240). rows_*() functions now fail elegantly y zero column data frame isn’t specified (#6179).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-108","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.8","title":"dplyr 1.0.8","text":"CRAN release: 2022-02-08 Better display error messages thanks rlang 1.0.0. mutate(.keep = \"none\") longer identical transmute(). transmute() changed, completely ignores column ordering existing data, instead relying ordering expressions supplied .... mutate(.keep = \"none\") changed ensure pre-existing columns never moved, aligns closely .keep options (#6086). filter() forbids matrix results (#5973) warns data frame results, especially data frames created across() hint use if_any() if_all(). slice() helpers (slice_head(), slice_tail(), slice_min(), slice_max()) now accept negative values n prop (#5961). slice() now indicates group produces error (#5931). cur_data() cur_data_all() don’t simplify list columns rowwise data frames (#5901). dplyr now uses rlang::check_installed() prompt whether install required packages missing. storms data updated 2020 (@steveharoz, #5899). coalesce() accepts 1-D arrays (#5557). deprecated trunc_mat() longer reexported dplyr (#6141).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-107","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.7","title":"dplyr 1.0.7","text":"CRAN release: 2021-06-18 across() uses formula environment inlining (#5886). summarise.rowwise_df() quiet result ungrouped (#5875). c_across() across() key deparsing confused long calls (#5883). across() handles named selections (#5207).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-106","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.6","title":"dplyr 1.0.6","text":"CRAN release: 2021-05-05 add_count() now generic (#5837). if_any() if_all() abort predicate mistakingly used .cols= (#5732). Multiple calls if_any() /if_all() expression now properly disambiguated (#5782). filter() now inlines if_any() if_all() expressions. greatly improves performance grouped data frames. Fixed behaviour ... top-level across() calls (#5813, #5832). across() now inlines lambda-formulas. slightly performant allow optimisations future. Fixed issue bind_rows() causing lists incorrectly transformed data frames (#5417, #5749). select() longer creates duplicate variables renaming variable name grouping variable (#5841). dplyr_col_select() keeps attributes bare data frames (#5294, #5831). Fixed quosure handling dplyr::group_by() caused issues extra arguments (tidyverse/lubridate#959). Removed name argument compute() generic (@ianmcook, #5783). row-wise data frames 0 rows list columns supported (#5804).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-105","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.5","title":"dplyr 1.0.5","text":"CRAN release: 2021-03-05 Fixed edge case slice_sample() weight_by= used 0 rows (#5729). across() can use columns functions defined inline (#5734). Using testthat 3rd edition. Fixed bugs introduced across() previous version (#5765). group_by() keeps attributes unrelated grouping (#5760). .cols= argument if_any() if_all() defaults everything().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-104","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.4","title":"dplyr 1.0.4","text":"CRAN release: 2021-02-02 Improved performance across(). makes summarise(across()) mutate(across()) perform well superseded colwise equivalents (#5697). New functions if_any() if_all() (#4770, #5713). summarise() silently ignores NULL results (#5708). Fixed performance regression mutate() warnings occur per group (#5675). longer instrument warnings debugging information mutate() called within suppressWarnings().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-103","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.3","title":"dplyr 1.0.3","text":"CRAN release: 2021-01-15 summarise() longer informs result ungrouped (#5633). group_by(.drop = FALSE) preserves ordered factors (@brianrice2, #5545). count() tally() now generic. Removed default fallbacks lazyeval methods; yield better error messages call dplyr function wrong input, part long term plan remove deprecated lazyeval interface. inner_join() gains keep parameter consistency mutating joins (@patrickbarks, #5581). Improved performance many columns, dynamic data mask using active bindings lazy chops (#5017). mutate() friends preserves row names data frames (#5418). group_by() uses ungrouped data implicit mutate step (#5598). might define ungroup() method custom classes. example, see https://github.com/hadley/cubelyr/pull/3. relocate() can rename columns relocates (#5569). distinct() group_by() better error messages mutate step fails (#5060). Clarify () vectorised (#5493). Fixed across() issue data frame columns referred all_of() nested case (mutate() within mutate()) (#5498). across() handles data frames 0 columns (#5523). mutate() always keeps grouping variables, unconditional .keep= (#5582). dplyr now depends R 3.3.0","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-102","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.2","title":"dplyr 1.0.2","text":"CRAN release: 2020-08-18 Fixed across() issue data frame columns mask objects referred all_of() (#5460). bind_cols() gains .name_repair argument, passed vctrs::vec_cbind() (#5451) summarise(.groups = \"rowwise\") makes rowwise data frame even input data grouped (#5422).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-101","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.1","title":"dplyr 1.0.1","text":"CRAN release: 2020-07-31 New function cur_data_all() similar cur_data() includes grouping variables (#5342). count() tally() longer automatically weights column n present (#5298). dplyr 1.0.0 introduced behaviour Hadley’s faulty memory. Historically tally() automatically weighted count() , behaviour accidentally changed 0.8.2 (#4408) neither automatically weighted n. Since 0.8.2 almost year old, automatically weighting behaviour little confusing anyway, ’ve removed count() tally(). Use wt = n() now deprecated; now just omit wt argument. coalesce() now supports data frames correctly (#5326). cummean() longer --one indexing problem (@cropgen, #5287). call stack preserved error. makes possible recover() problematic code called dplyr verbs (#5308).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-100","dir":"Changelog","previous_headings":"","what":"dplyr 1.0.0","title":"dplyr 1.0.0","text":"CRAN release: 2020-05-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-1-0-0","dir":"Changelog","previous_headings":"","what":"Breaking changes","title":"dplyr 1.0.0","text":"bind_cols() longer converts tibble, returns data frame input data frame. bind_rows(), *_join(), summarise() mutate() use vctrs coercion rules. two main user facing changes: Combining factor character vectors silently creates character vector; previously created character vector warning. Combining multiple factors creates factor combined levels; previously created character vector warning. bind_rows() functions use vctrs name repair, see ?vctrs::vec_as_names. .equal.tbl_df() removed. Data frames, tibbles grouped data frames longer considered equal, even data . Equality checks data frames longer ignore row order groupings. expect_equal() uses .equal() internally. comparing data frames, tests used pass may now fail. distinct() keeps original column order. distinct() missing columns now raises error, compatibility warning long time. group_modify() puts grouping variable front. n() row_number() can longer called directly dplyr loaded, now generates error: dplyr::mutate(mtcars, x = n()). Fix prefixing dplyr:: dplyr::mutate(mtcars, x = dplyr::n()) old data format grouped_df longer supported. may affect serialized grouped data frames disk, e.g. saveRDS() using knitr caching. lead() lag() stricter inputs. Extending data frames requires extra class classes added first, last. extra class end causes vctrs operations fail message like: right_join() longer sorts rows resulting tibble according order RHS argument tibble y.","code":"Input must be a vector, not a `` object"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-1-0-0","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 1.0.0","text":"cur_ functions (cur_data(), cur_group(), cur_group_id(), cur_group_rows()) provide full set options access information “current” group dplyr verbs. inspired data.table’s .SD, .GRP, ., .. rows_ functions (rows_insert(), rows_update(), rows_upsert(), rows_patch(), rows_delete()) provide new API insert delete rows second data frame table. Support updating mutable backends planned (#4654). mutate() summarise() create multiple columns single expression return data frame (#2326). select() rename() use latest version tidyselect interface. Practically, means can now combine selections using Boolean logic (.e. !, & |), use predicate functions () (e.g. (.character)) select variables type (#4680). also makes possible use select() rename() repair data frames duplicated names (#4615) prevents accidentally introducing duplicate names (#4643). also means dplyr now re-exports any_of() all_of() (#5036). slice() gains new set helpers: slice_head() slice_tail() select first last rows, like head() tail(), return n rows per group. slice_sample() randomly selects rows, taking sample_frac() sample_n(). slice_min() slice_max() select rows minimum maximum values variable, taking confusing top_n(). summarise() can create summaries greater length 1 use summary function returns multiple values. summarise() gains .groups= argument control grouping structure. New relocate() verb makes easy move columns around within data frame (#4598). New rename_with() designed specifically purpose renaming selected columns function (#4771). ungroup() can now selectively remove grouping variables (#3760). pull() can now return named vectors specifying additional column name (@ilarischeinin, #4102).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"experimental-features-1-0-0","dir":"Changelog","previous_headings":"","what":"Experimental features","title":"dplyr 1.0.0","text":"mutate() (data frames ), gains experimental new arguments ..allow control new columns placed (#2047). mutate() (data frames ), gains experimental new argument called .keep allows control variables kept input .data. .keep = \"\" default; keeps variables. .keep = \"none\" retains input variables (except grouping keys), behaves like transmute(). .keep = \"unused\" keeps variables used make new columns. .keep = \"used\" keeps input variables used create new columns; ’s useful double checking work (#3721). New, experimental, with_groups() makes easy temporarily group ungroup (#4711).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"across-1-0-0","dir":"Changelog","previous_headings":"","what":"across()","title":"dplyr 1.0.0","text":"New function across() can used inside summarise(), mutate(), verbs apply function (set functions) selection columns. See vignette(\"colwise\") details. New function c_across() can used inside summarise() mutate() row-wise data frames easily (e.g.) compute row-wise mean numeric variables. See vignette(\"rowwise\") details.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"rowwise-1-0-0","dir":"Changelog","previous_headings":"","what":"rowwise()","title":"dplyr 1.0.0","text":"rowwise() longer questioning; now understand ’s important tool don’t vectorised code. now also allows specify additional variables preserved output summarising (#4723). rowwise-ness preserved operations; need explicit drop as_tibble() group_by(). New, experimental, nest_by(). interface group_by(), returns rowwise data frame grouping keys, supplemental list-column data frames containing rest data.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vctrs-1-0-0","dir":"Changelog","previous_headings":"","what":"vctrs","title":"dplyr 1.0.0","text":"implementation dplyr verbs changed use primitives provided vctrs package. makes easier add support new types vector, radically simplifies implementation, makes dplyr verbs consistent. place mostly likely impacted coercion changes working factors joins grouped mutates: now combining factors different levels, dplyr creates new factor union levels. matches base R closely, perhaps strictly less correct, much convenient. dplyr dropped two heaviest dependencies: Rcpp BH. make considerably easier faster build source. implementation verbs carefully thought . mostly makes implementation simpler hopefully increase consistency, also makes easier adapt dplyr new data structures new future. Pragmatically, biggest difference people verb documents return value terms rows, columns, groups, data frame attributes. Row names now preserved working data frames.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"grouping-1-0-0","dir":"Changelog","previous_headings":"","what":"Grouping","title":"dplyr 1.0.0","text":"group_by() uses hashing vctrs package. Grouped data frames now names<-, [[<-, [<- $<- methods re-generate underlying grouping. Note modifying grouping variables multiple steps (.e. df$grp1 <- 1; df$grp2 <- 1) inefficient since data frame regrouped modification. [.grouped_df now regroups respect grouping columns removed (#4708). mutate() summarise() can now modify grouping variables (#4709). group_modify() works additional arguments (@billdenney @cderv, #4509) group_by() create arbitrary NA group grouping factors drop = TRUE (#4460).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"lifecycle-changes-1-0-0","dir":"Changelog","previous_headings":"","what":"Lifecycle changes","title":"dplyr 1.0.0","text":"deprecations now use lifecycle, means default ’ll see deprecation warning per session, can control options(lifecycle_verbosity = x) x one NULL, “quiet”, “warning”, “error”.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"removed-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Removed","title":"dplyr 1.0.0","text":"id(), deprecated dplyr 0.5.0, now defunct. failwith(), deprecated dplyr 0.7.0, now defunct. tbl_cube() nasa pulled separate cubelyr package (#4429). rbind_all() rbind_list() removed (@bjungbogati, #4430). dr_dplyr() removed longer needed (#4433, @smwindecker).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Deprecated","title":"dplyr 1.0.0","text":"Use pkgconfig setting na_matches argument join functions now deprecated (#4914). rarely used, ’m now confident default correct R. add_count(), drop argument deprecated didn’t actually affect output. add_rownames(): please use tibble::rownames_to_column() instead. .tbl() tbl_df(): please use as_tibble() instead. bench_tbls(), compare_tbls(), compare_tbls2(), eval_tbls() eval_tbls2() now deprecated. used handful packages, now believe ’re better performing comparisons directly (#4675). combine(): please use vctrs::vec_c() instead. funs(): please use list() instead. group_by(add = ): please use .add instead. group_by(.dots = )/group_by_prepare(.dots = ): please use !!! instead (#4734). use zero-arg group_indices() retrieve group id “current” group deprecated; instead use cur_group_id(). Passing arguments group_keys() group_indices() change grouping deprecated, instead grouping first . location() changes(): please use lobstr::ref() instead. progress_estimated() soft deprecated; ’s responsibility dplyr provide progress bars (#4935). src_local() deprecated; part approach testing dplyr backends didn’t pan . src_mysql(), src_postgres(), src_sqlite() deprecated. ’ve recommended time. Instead please use approach described https://dbplyr.tidyverse.org/. select_vars(), rename_vars(), select_var(), current_vars() now deprecated (@perezp44, #4432)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"superseded-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Superseded","title":"dplyr 1.0.0","text":"scoped helpers (functions ending _if, _at, _all) superseded across(). dramatically reduces API surface dplyr, providing providing flexible less error-prone interface (#4769). rename_*() select_*() superseded rename_with(). () superseded favour summarise(). sample_n() sample_frac() superseded slice_sample(). See ?sample_n details , examples converting old new usage. top_n() superseded byslice_min()/slice_max(). See ?top_n details , convert old new usage (#4494).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"questioning-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Questioning","title":"dplyr 1.0.0","text":"all_equal() questioning; solves problem longer seems important.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"stable-1-0-0","dir":"Changelog","previous_headings":"Lifecycle changes","what":"Stable","title":"dplyr 1.0.0","text":"rowwise() longer questioning.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"documentation-improvements-1-0-0","dir":"Changelog","previous_headings":"","what":"Documentation improvements","title":"dplyr 1.0.0","text":"New vignette(\"base\") describes dplyr verbs relate base R equivalents (@sastoudt, #4755) New vignette(\"grouping\") gives details dplyr verbs change applied grouped data frames (#4779, @MikeKSmith). vignette(\"programming\") completely rewritten reflect latest vocabulary, recent rlang features, current recommendations. now substantially easier program dplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-and-bug-fixes-1-0-0","dir":"Changelog","previous_headings":"","what":"Minor improvements and bug fixes","title":"dplyr 1.0.0","text":"dplyr now rudimentary, experimental, stop-gap, extension mechanism documented ?dplyr_extending dplyr longer provides .equal.tbl_df() method. never done first place owns neither generic class. also provided problematic implementation , default, ignored order rows columns usually important. likely cause new test failures downstream packages; whole believe failures either reflect unexpected behaviour tests need strengthened (#2751). coalesce() now uses vctrs recycling common type coercion rules (#5186). count() add_count() better job preserving input class attributes (#4086). distinct() errors request use variables don’t exist (previously warning) (#4656). filter(), mutate() summarise() get better error messages. filter() handles data frame results columns logical vectors reducing & (#4678). particular means across() can used filter(). left_join(), right_join(), full_join() gain keep argument can optionally choose keep sets join keys (#4589). useful want figure rows missing either side. Join functions can now perform cross-join specifying = character() (#4206.) groups() now returns list() ungrouped data; previously returned NULL type-unstable (groups returns list symbols). first argument group_map(), group_modify() group_walk() changed .data consistency generics. group_keys.rowwise_df() gives 0 column data frame n() rows. group_map() now generic (#4576). group_by(..., .add = TRUE) replaces group_by(..., add = TRUE), deprecation message. old argument name mistake prevents creating new grouping var called add violates naming conventions (#4137). intersect(), union(), setdiff() setequal() generics now imported generics package. reduces conflict lubridate. order_by() gives informative hint accidentally call instead arrange() #3357. tally() count() now message default output name (n), already exists data frame. quiet message, ’ll need supply explicit name (#4284). can override default weighting using constant setting wt = 1. starwars dataset now better job separating biological sex gender identity. previous gender column renamed sex, since actually describes individual’s biological sex. new gender column encodes actual gender identity using information Star Wars universe (@MeganBeckett, #4456). src_tbls() accepts ... arguments (#4485, @ianmcook). breaking change dplyr backend packages implement src_tbls(). Better performance extracting slices factors ordered factors (#4501). rename_at() rename_all() call function simple character vector, dplyr_sel_vars (#4459). ntile() now consistent database implementations buckets irregular size (#4495).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-085-2020-03-07","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.5 (2020-03-07)","title":"dplyr 0.8.5 (2020-03-07)","text":"CRAN release: 2020-03-07 Maintenance release compatibility R-devel.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-084-2020-01-30","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.4 (2020-01-30)","title":"dplyr 0.8.4 (2020-01-30)","text":"CRAN release: 2020-01-31 Adapt tests changes dependent packages.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-083-2019-07-04","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.3 (2019-07-04)","title":"dplyr 0.8.3 (2019-07-04)","text":"CRAN release: 2019-07-04 Fixed performance regression introduced version 0.8.2 (#4458).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-082-2019-06-28","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.2 (2019-06-28)","title":"dplyr 0.8.2 (2019-06-28)","text":"CRAN release: 2019-06-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-8-2","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.8.2 (2019-06-28)","text":"top_frac(data, proportion) shorthand top_n(data, proportion * n()) (#4017).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"colwise-changes-0-8-2","dir":"Changelog","previous_headings":"","what":"colwise changes","title":"dplyr 0.8.2 (2019-06-28)","text":"Using quosures colwise verbs deprecated (#4330). Updated distinct_if(), distinct_at() distinct_all() include .keep_all argument (@beansrowning, #4343). rename_at() handles empty selection (#4324). *_if() functions correctly handle columns special names (#4380). colwise functions support constants formulas (#4374).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"hybrid-evaluation-changes-0-8-2","dir":"Changelog","previous_headings":"","what":"Hybrid evaluation changes","title":"dplyr 0.8.2 (2019-06-28)","text":"hybrid rank functions correctly handle NA (#4427). first(), last() nth() hybrid version handles factors (#4295).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-8-2","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.8.2 (2019-06-28)","text":"top_n() quotes n argument, n longer needs constant groups (#4017). tbl_vars() keeps information grouping columns returning dplyr_sel_vars object (#4106). group_split() always sets ptype attribute, make robust case 0 groups. group_map() group_modify() work 0 group edge case (#4421) select.list() method added select() dispatch lists (#4279). view() reexported tibble (#4423). group_by() puts NA groups last character vectors (#4227). arrange() handles integer64 objects (#4366). summarise() correctly resolves summarised list columns (#4349).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-081-2019-05-14","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.1 (2019-05-14)","title":"dplyr 0.8.1 (2019-05-14)","text":"CRAN release: 2019-05-14","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-0-8-1","dir":"Changelog","previous_headings":"","what":"Breaking changes","title":"dplyr 0.8.1 (2019-05-14)","text":"group_modify() new name function previously known group_map()","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-8-1","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.8.1 (2019-05-14)","text":"group_map() now calls function group return list. group_by_drop_default(), previously known dplyr:::group_drops() exported (#4245).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-8-1","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.8.1 (2019-05-14)","text":"Lists formulas passed colwise verbs now automatically named. group_by() shallow copy even groups case (#4221). Fixed mutate() rowwise data frames 0 rows (#4224). Fixed handling bare formulas colwise verbs (#4183). Fixed performance n_distinct() (#4202). group_indices() now ignores empty groups default data.frame, consistent default group_by() (@yutannihilation, #4208). Fixed integer overflow hybrid ntile() (#4186). colwise functions summarise_at() … can rename vars case multiple functions (#4180). select_if() rename_if() handle logical vector predicate (#4213). hybrid min() max() cast integer possible (#4258). bind_rows() correctly handles cases multiple consecutive NULL (#4296). Support R 3.1.* dropped. minimal R version supported now 3.2.0. https://www.tidyverse.org/articles/2019/04/r-version-support/ rename_at() handles empty selection (#4324).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-0801-2019-02-15","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.0.1 (2019-02-15)","title":"dplyr 0.8.0.1 (2019-02-15)","text":"CRAN release: 2019-02-15 Fixed integer C/C++ division, forced released CRAN (#4185).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-080-2019-02-14","dir":"Changelog","previous_headings":"","what":"dplyr 0.8.0 (2019-02-14)","title":"dplyr 0.8.0 (2019-02-14)","text":"CRAN release: 2019-02-14","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-0-8-0","dir":"Changelog","previous_headings":"","what":"Breaking changes","title":"dplyr 0.8.0 (2019-02-14)","text":"error find function \"n\" warning Calling `n()` without importing prefixing deprecated, use `dplyr::n()` indicates functions like n(), row_number(), … imported prefixed. easiest fix import dplyr import(dplyr) NAMESPACE #' @import dplyr roxygen comment, alternatively functions can imported selectively function importFrom(dplyr, n) NAMESPACE #' @importFrom dplyr n roxygen comment. third option prefix , .e. use dplyr::n() see checking S3 generic/method consistency R CMD check package, note : sample_n() sample_frac() gained ... filter() slice() gained .preserve group_by() gained .drop Error: `.data` corrupt grouped_df, ... signals code makes wrong assumptions internals grouped data frame.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-8-0","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.8.0 (2019-02-14)","text":"New selection helpers group_cols(). can called selection contexts select() matches grouping variables grouped tibbles. last_col() re-exported tidyselect (#3584). group_trim() drops unused levels factors used grouping variables. nest_join() creates list column matching rows. nest_join() + tidyr::unnest() equivalent inner_join (#3570). group_nest() similar tidyr::nest() focusing variables nest instead nested columns. group_split() similar base::split() operating existing groups applied grouped data frame, subject data mask ungrouped data frames group_map() group_walk() purrr-like functions iterate groups grouped data frame, jointly identified data subset (exposed .x) data key (one row tibble, exposed .y). group_map() returns grouped data frame combines results function, group_walk() used side effects returns input invisibly. distinct_prepare(), previously known distinct_vars() exported. mostly useful alternative backends (e.g. dbplyr).","code":"band_members %>% nest_join(band_instruments) starwars %>% group_by(species, homeworld) %>% group_nest() starwars %>% group_nest(species, homeworld) starwars %>% group_by(species, homeworld) %>% group_split() starwars %>% group_split(species, homeworld) mtcars %>% group_by(cyl) %>% group_map(~ head(.x, 2L))"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"major-changes-0-8-0","dir":"Changelog","previous_headings":"","what":"Major changes","title":"dplyr 0.8.0 (2019-02-14)","text":"group_by() gains .drop argument. set FALSE groups generated based factor levels, hence groups may empty (#341). default behaviour drops empty groups previous versions. filter() slice() gain .preserve argument control groups keep. default filter(.preserve = FALSE) recalculates grouping structure based resulting data, otherwise kept . notion lazily grouped data frames disappeared. dplyr verbs now recalculate immediately grouping structure, respect levels factors. Subsets columns now properly dispatch [ [[ method column object (vector class) instead making assumptions column handled. [ method must handle integer indices, including NA_integer_, .e. x[NA_integer_] produce vector class x whatever represents missing value.","code":"# 3 groups tibble( x = 1:2, f = factor(c(\"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) %>% group_by(f, .drop = FALSE) # the order of the grouping variables matter df <- tibble( x = c(1,2,1,2), f = factor(c(\"a\", \"b\", \"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) df %>% group_by(f, x, .drop = FALSE) df %>% group_by(x, f, .drop = FALSE) tibble( x = 1:2, f = factor(c(\"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) %>% group_by(f) df <- tibble( x = c(1,2,1,2), f = factor(c(\"a\", \"b\", \"a\", \"b\"), levels = c(\"a\", \"b\", \"c\")) ) %>% group_by(x, f, .drop = FALSE) df %>% filter(x == 1) df %>% filter(x == 1, .preserve = TRUE)"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-8-0","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.8.0 (2019-02-14)","text":"tally() works correctly non-data frame table sources tbl_sql (#3075). sample_n() sample_frac() can use n() (#3527) distinct() respects order variables provided (#3195, @foo-bar-baz-qux) handles 0 rows 0 columns special case (#2954). combine() uses tidy dots (#3407). group_indices() can used without argument expressions verbs (#1185). Using mutate_all(), transmute_all(), mutate_if() transmute_if() grouped tibbles now informs grouping variables ignored. case _all() verbs, message invites use mutate_at(df, vars(-group_cols())) (equivalent transmute_at() call) instead ’d like make explicit code operation applied grouping variables. Scoped variants arrange() respect .by_group argument (#3504). first() last() hybrid functions fall back R evaluation given arguments (#3589). mutate() removes column expression evaluates NULL groups (#2945). grouped data frames support [, drop = TRUE] (#3714). New low-level constructor new_grouped_df() validator validate_grouped_df (#3837). glimpse() prints group information grouped tibbles (#3384). sample_n() sample_frac() gain ... (#2888). Scoped filter variants now support functions purrr-like lambdas:","code":"mtcars %>% filter_at(vars(hp, vs), ~ . %% 2 == 0)"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"lifecycle-0-8-0","dir":"Changelog","previous_headings":"","what":"Lifecycle","title":"dplyr 0.8.0 (2019-02-14)","text":"(), rowwise() combine() questioning (#3494). funs() soft-deprecated start issuing warnings future version.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"changes-to-column-wise-functions-0-8-0","dir":"Changelog","previous_headings":"","what":"Changes to column wise functions","title":"dplyr 0.8.0 (2019-02-14)","text":"Scoped variants distinct(): distinct_at(), distinct_if(), distinct_all() (#2948). summarise_at() excludes grouping variables (#3613). mutate_all(), mutate_at(), summarise_all() summarise_at() handle utf-8 names (#2967).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"performance-0-8-0","dir":"Changelog","previous_headings":"","what":"Performance","title":"dplyr 0.8.0 (2019-02-14)","text":"R expressions handled native code now evaluated unwind-protection available (R 3.5 later). improves performance dplyr data frames many groups (hence many expressions evaluate). benchmarked computing grouped average consistently twice fast unwind-protection enabled. Unwind-protection also makes dplyr robust corner cases ensures C++ destructors correctly called circumstances (debugger exit, captured condition, restart invocation). sample_n() sample_frac() gain ... (#2888). Improved performance wide tibbles (#3335). Faster hybrid sum(), mean(), var() sd() logical vectors (#3189). Hybrid version sum(na.rm = FALSE) exits early missing values. considerably improves performance missing values early vector (#3288). group_by() trigger additional mutate() simple uses .data pronoun (#3533).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"internal-0-8-0","dir":"Changelog","previous_headings":"","what":"Internal","title":"dplyr 0.8.0 (2019-02-14)","text":"grouping metadata grouped data frame reorganized single tidy tibble, can accessed new group_data() function. grouping tibble consists one column per grouping variable, followed list column (1-based) indices groups. new group_rows() function retrieves list indices (#3489). Hybrid evaluation completely redesigned better performance stability.","code":"# the grouping metadata, as a tibble group_by(starwars, homeworld) %>% group_data() # the indices group_by(starwars, homeworld) %>% group_data() %>% pull(.rows) group_by(starwars, homeworld) %>% group_rows()"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"documentation-0-8-0","dir":"Changelog","previous_headings":"","what":"Documentation","title":"dplyr 0.8.0 (2019-02-14)","text":"Add documentation example moving variable back ?select (#3051). column wise functions better documented, particular explaining grouping variables included part selection.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-and-defunct-functions-0-8-0","dir":"Changelog","previous_headings":"Documentation","what":"Deprecated and defunct functions","title":"dplyr 0.8.0 (2019-02-14)","text":"mutate_each() summarise_each() deprecated.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-076","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.6","title":"dplyr 0.7.6","text":"CRAN release: 2018-06-29 exprs() longer exported avoid conflicts Biobase::exprs() (#3638). MASS package explicitly suggested fix CRAN warnings R-devel (#3657). Set operations like intersect() setdiff() reconstruct groups metadata (#3587) keep order rows (#3839). Using namespaced calls base::sort() base::unique() C++ code avoid ambiguities functions overridden (#3644). Fix rchk errors (#3693).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-075-2018-04-14","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.5 (2018-04-14)","title":"dplyr 0.7.5 (2018-04-14)","text":"CRAN release: 2018-05-19","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"breaking-changes-for-package-developers-0-7-5","dir":"Changelog","previous_headings":"","what":"Breaking changes for package developers","title":"dplyr 0.7.5 (2018-04-14)","text":"major change version dplyr now depends selecting backend tidyselect package. linking dplyr::select_helpers documentation topic, update link point tidyselect::select_helpers. Another change causes warnings packages dplyr now exports exprs() function. causes collision Biobase::exprs(). Either import functions dplyr selectively rather bulk, import Biobase::exprs() refer namespace qualifier.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-7-5","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.7.5 (2018-04-14)","text":"distinct(data, \"string\") now returns one-row data frame . (previous behavior return data unchanged.) () operations one named argument can access . (#2998). Reindexing grouped data frames (e.g. filter() ..._join()) never updates \"class\" attribute. also avoids unintended updates original object (#3438). Fixed rare column name clash ..._join() non-join columns name tables (#3266). Fix ntile() row_number() ordering use locale-dependent ordering functions R dealing character vectors, rather always using C-locale ordering function C (#2792, @foo-bar-baz-qux). Summaries summaries (summarise(b = sum(), c = sum(b))) now computed using standard evaluation simplicity correctness, slightly slower (#3233). Fixed summarise() empty data frames zero columns (#3071).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"major-changes-0-7-5","dir":"Changelog","previous_headings":"","what":"Major changes","title":"dplyr 0.7.5 (2018-04-14)","text":"enexpr(), expr(), exprs(), sym() syms() now exported. sym() syms() construct symbols strings character vectors. expr() variants equivalent quo(), quos() enquo() return simple expressions rather quosures. support quasiquotation. dplyr now depends new tidyselect package power select(), rename(), pull() variants (#2896). Consequently select_vars(), select_var() rename_vars() soft-deprecated start issuing warnings future version. Following switch tidyselect, select() rename() fully support character vectors. can now unquote variables like : Note works selecting functions contexts strings character vectors ambiguous. instance strings valid input mutating operations mutate(df, \"foo\") creates new column recycling “foo” number rows.","code":"vars <- c(\"disp\", \"cyl\") select(mtcars, !! vars) select(mtcars, -(!! vars))"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-changes-0-7-5","dir":"Changelog","previous_headings":"","what":"Minor changes","title":"dplyr 0.7.5 (2018-04-14)","text":"Support raw vector columns arrange(), group_by(), mutate(), summarise() ..._join() (minimal raw x raw support initially) (#1803). bind_cols() handles unnamed list (#3402). bind_rows() works around corrupt columns object bit set class attribute (#3349). combine() returns logical() inputs NULL (inputs) (#3365, @zeehio). distinct() now supports renaming columns (#3234). Hybrid evaluation simplifies dplyr::foo() foo() (#3309). Hybrid functions can now masked regular R functions turn hybrid evaluation (#3255). hybrid evaluator finds functions dplyr even dplyr attached (#3456). mutate() now illegal use data.frame rhs (#3298). Support !!! recode_factor() (#3390). row_number() works empty subsets (#3454). select() vars() now treat NULL empty inputs (#3023). Scoped select rename functions (select_all(), rename_if() etc.) now work grouped data frames, adapting grouping necessary (#2947, #3410). group_by_at() can group existing grouping variable (#3351). arrange_at() can use grouping variables (#3332). slice() longer enforce tibble classes input simple data.frame, ignores 0 (#3297, #3313). transmute() longer prints message including group variable.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"documentation-0-7-5","dir":"Changelog","previous_headings":"","what":"Documentation","title":"dplyr 0.7.5 (2018-04-14)","text":"Improved documentation funs() (#3094) set operations (e.g. union()) (#3238, @edublancas).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"error-messages-0-7-5","dir":"Changelog","previous_headings":"","what":"Error messages","title":"dplyr 0.7.5 (2018-04-14)","text":"Better error message dbplyr installed accessing database backends (#3225). arrange() fails gracefully data.frame columns (#3153). Corrected error message calling cbind() object wrong length (#3085). Add warning explanation distinct() selected columns type list (#3088, @foo-bar-baz-qux), used unknown columns (#2867, @foo-bar-baz-qux). Show clear error message bad arguments funs() (#3368). Better error message ..._join() joining data frames duplicate NA column names. Joining data frames semi- anti-join now gives warning, may converted error future versions (#3243, #3417). Dedicated error message trying use columns Interval Period classes (#2568). Added .onDetach() hook allows plyr loaded attached without warning message says functions dplyr masked, since dplyr longer attached (#3359, @jwnorman).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"performance-0-7-5","dir":"Changelog","previous_headings":"","what":"Performance","title":"dplyr 0.7.5 (2018-04-14)","text":"sample_n() sample_frac() grouped data frame now faster especially large number groups (#3193, @saurfang).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"internal-0-7-5","dir":"Changelog","previous_headings":"","what":"Internal","title":"dplyr 0.7.5 (2018-04-14)","text":"Compute variable names joins R (#3430). Bumped Rcpp dependency 0.12.15 avoid imperfect detection NA values hybrid evaluation fixed RcppCore/Rcpp#790 (#2919). Avoid cleaning data mask, temporary environment used evaluate expressions. environment, e.g. mutate() expression evaluated, preserved operation, accessing variables environment now gives warning still returns NULL (#3318).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-074","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.4","title":"dplyr 0.7.4","text":"CRAN release: 2017-09-28 Fix recent Fedora ASAN check errors (#3098). Avoid dependency Rcpp 0.12.10 (#3106).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-073","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.3","title":"dplyr 0.7.3","text":"CRAN release: 2017-09-09 Fixed protection error occurred creating character column using grouped mutate() (#2971). Fixed rare problem accessing variable values summarise() groups size one (#3050). distinct() now throws error used unknown columns (#2867, @foo-bar-baz-qux). Fixed rare --bounds memory write slice() negative indices beyond number rows involved (#3073). select(), rename() summarise() longer change grouped vars original data (#3038). nth(default = var), first(default = var) last(default = var) fall back standard evaluation grouped operation instead triggering error (#3045). case_when() now works LHS atomic (#2909), LHS RHS values zero-length vectors (#3048). case_when() accepts NA LHS (#2927). Semi- anti-joins now preserve order left-hand-side data frame (#3089). Improved error message invalid list arguments bind_rows() (#3068). Grouping character vectors now faster (#2204). Fixed crash occurred unexpected input supplied call argument order_by() (#3065).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-072","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.2","title":"dplyr 0.7.2","text":"CRAN release: 2017-07-20 Move build-time vs. run-time checks .onLoad() dr_dplyr().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-071","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.1","title":"dplyr 0.7.1","text":"CRAN release: 2017-06-22 Use new versions bindrcpp glue avoid protection problems. Avoid wrapping arguments internal error functions (#2877). Fix two protection mistakes found rchk (#2868). Fix C++ error caused compilation fail mac cran (#2862) Fix undefined behaviour (), NA_REAL assigned instead NA_LOGICAL. (#2855, @zeehio) top_n() now executes operations lazily compatibility database backends (#2848). Reuse new variables created ungrouped mutate() possible , regression introduced dplyr 0.7.0 (#2869). Quosured symbols prevent hybrid handling anymore. fix many performance issues introduced tidyeval (#2822).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-070","dir":"Changelog","previous_headings":"","what":"dplyr 0.7.0","title":"dplyr 0.7.0","text":"CRAN release: 2017-06-09","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-data-functions-and-features-0-7-0","dir":"Changelog","previous_headings":"","what":"New data, functions, and features","title":"dplyr 0.7.0","text":"Five new datasets provide interesting built-datasets demonstrate dplyr verbs (#2094): starwars dataset starwars characters; list columns storms trajectories ~200 tropical storms band_members, band_instruments band_instruments2 simple data demonstrate joins. New add_count() add_tally() adding n column within groups (#2078, @dgrtwo). arrange() grouped data frames gains .by_group argument can choose sort groups want (defaults FALSE) (#2318) New pull() generic extracting single column either name position (either left right). Thanks @paulponcet idea (#2054). verb powered new select_var() internal helper, exported well. like select_vars() returns single variable. as_tibble() re-exported tibble. recommend way create tibbles existing data frames. tbl_df() softly deprecated. tribble() now imported tibble (#2336, @chrMongeau); now preferred frame_data().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-and-defunct-0-7-0","dir":"Changelog","previous_headings":"","what":"Deprecated and defunct","title":"dplyr 0.7.0","text":"dplyr longer messages need dtplyr work data.table (#2489). Long deprecated regroup(), mutate_each_q() summarise_each_q() functions removed. Deprecated failwith(). ’m even sure . Soft-deprecated mutate_each() summarise_each(), functions print message changed warning next release. .env argument sample_n() sample_frac() defunct, passing value argument print message changed warning next release.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"databases-0-7-0","dir":"Changelog","previous_headings":"","what":"Databases","title":"dplyr 0.7.0","text":"version dplyr includes major changes database connections work. large, able continue using existing dplyr database code without modification, two big changes aware : Almost database related code moved dplyr new package, dbplyr. makes dplyr simpler, make easier release fixes bugs affect databases. src_mysql(), src_postgres(), src_sqlite() still live dplyr existing code continues work. longer necessary create remote “src”. Instead can work directly database connection returned DBI. reflects maturity DBI ecosystem. Thanks largely work Kirill Muller (funded R Consortium) DBI backends now much consistent, comprehensive, easier use. means ’s longer need layer DBI. can continue use src_mysql(), src_postgres(), src_sqlite(), recommend new style makes connection DBI clear: particularly useful want perform non-SELECT queries can whatever want DBI::dbGetQuery() DBI::dbExecute(). ’ve implemented database backend dplyr, please read backend news see ’s changed perspective (much). want ensure package works current previous version dplyr, see wrap_dbplyr_obj() helpers.","code":"library(dplyr) con <- DBI::dbConnect(RSQLite::SQLite(), \":memory:\") DBI::dbWriteTable(con, \"mtcars\", mtcars) mtcars2 <- tbl(con, \"mtcars\") mtcars2"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"utf-0-7-0","dir":"Changelog","previous_headings":"","what":"UTF-8","title":"dplyr 0.7.0","text":"Internally, column names always represented character vectors, language symbols, avoid encoding problems Windows (#1950, #2387, #2388). Error messages explanations data frame inequality now encoded UTF-8, also Windows (#2441). Joins now always reencode character columns UTF-8 necessary. gives nice speedup, now pointer comparison can used instead string comparison, relies proper encoding tag strings (#2514). Fixed problems joining factor character encodings mix native UTF-8 encoded values (#1885, #2118, #2271, #2451). Fix group_by() data frames UTF-8 encoded names (#2284, #2382). New group_vars() generic returns grouping character vector, avoid potentially lossy conversion language symbols. list returned group_by_prepare() now new group_names component (#1950, #2384).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"colwise-functions-0-7-0","dir":"Changelog","previous_headings":"","what":"Colwise functions","title":"dplyr 0.7.0","text":"rename(), select(), group_by(), filter(), arrange() transmute() now scoped variants (verbs suffixed _if(), _at() _all()). Like mutate_all(), summarise_if(), etc, variants apply operation selection variables. scoped verbs taking predicates (mutate_if(), summarise_if(), etc) now support S3 objects lazy tables. S3 objects implement methods length(), [[ tbl_vars(). lazy tables, first 100 rows collected predicate applied subset data. robust common case checking type column (#2129). Summarise mutate colwise functions pass ... manipulation functions. performance colwise verbs like mutate_all() now back mutate_each(). funs() better handling namespaced functions (#2089). Fix issue mutate_if() summarise_if() predicate function returns vector FALSE (#1989, #2009, #2011).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"tidyeval-0-7-0","dir":"Changelog","previous_headings":"","what":"Tidyeval","title":"dplyr 0.7.0","text":"dplyr new approach non-standard evaluation (NSE) called tidyeval. described detail vignette(\"programming\") , brief, gives ability interpolate values contexts dplyr usually works expressions: ```{r} my_var <- quo(homeworld) starwars %>% group_by(!!my_var) %>% summarise_at(vars(height:mass), mean, na.rm = TRUE) ``` means underscored version main verb longer needed, functions deprecated (remain around backward compatibility). order_by(), top_n(), sample_n() sample_frac() now use tidyeval capture arguments expression. makes possible use unquoting idioms (see vignette(\"programming\")) fixes scoping issues (#2297). verbs taking dots now ignore last argument empty. makes easier copy lines code without worry deleting trailing commas (#1039). [API] new .data .env environments can used inside verbs operate data: .data$column_name accesses column column_name, whereas .env$var accesses external variable var. Columns external variables named .data .env shadowed, use .data$... /.env$... access . (.data implements strict matching also $ operator (#2591).) column() global() functions removed. never documented officially. Use new .data .env environments instead. Expressions verbs now interpreted correctly many cases failed (e.g., use $, case_when(), nonstandard evaluation, …). expressions now evaluated specially constructed temporary environment retrieves column data demand help bindrcpp package (#2190). temporary environment poses restrictions assignments using <- inside verbs. prevent leaking broken bindings, temporary environment cleared evaluation (#2435).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"joins-0-7-0","dir":"Changelog","previous_headings":"Verbs","what":"Joins","title":"dplyr 0.7.0","text":"[API] xxx_join.tbl_df(na_matches = \"never\") treats NA values different (value), never match. corresponds behavior joins database sources, database joins general. match NA values, pass na_matches = \"na\" join verbs; supported data frames. default na_matches = \"na\", kept sake compatibility v0.5.0. can tweaked calling pkgconfig::set_config(\"dplyr::na_matches\", \"na\") (#2033). common_by() gets better error message unexpected inputs (#2091) Fix groups joining grouped data frames duplicate columns (#2330, #2334, @davidkretch). One two join suffixes can now empty string, dplyr longer hangs (#2228, #2445). Anti- semi-joins warn factor levels inconsistent (#2741). Warnings join column inconsistencies now contain column names (#2728).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"select-0-7-0","dir":"Changelog","previous_headings":"Verbs","what":"Select","title":"dplyr 0.7.0","text":"selecting variables, first selector decides ’s inclusive selection (.e., initial column list empty), exclusive selection (.e., initial column list contains columns). means select(mtcars, contains(\"\"), contains(\"FOO\"), contains(\"vs\")) now returns vs columns like dplyr 0.4.3 (#2275, #2289, @r2evans). Select helpers now throw error called variables set (#2452) Helper functions select() (related verbs) now evaluated context column names exist (#2184). select() (internal function select_vars()) now support column names addition column positions. result, expressions like select(mtcars, \"cyl\") now allowed.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"other-0-7-0","dir":"Changelog","previous_headings":"Verbs","what":"Other","title":"dplyr 0.7.0","text":"recode(), case_when() coalesce() now support splicing arguments rlang’s !!! operator. count() now preserves grouping input (#2021). distinct() longer duplicates variables (#2001). Empty distinct() grouped data frame works way empty distinct() ungrouped data frame, namely uses variables (#2476). copy_to() now returns output invisibly (since ’re often just calling side-effect). filter() lag() throw informative error used ts objects (#2219) mutate() recycles list columns length 1 (#2171). mutate() gives better error message attempting add non-vector column (#2319), attempting remove column NULL (#2187, #2439). summarise() now correctly evaluates newly created factors (#2217), can create ordered factors (#2200). Ungrouped summarise() uses summary variables correctly (#2404, #2453). Grouped summarise() longer converts character NA empty strings (#1839).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"combining-and-comparing-0-7-0","dir":"Changelog","previous_headings":"","what":"Combining and comparing","title":"dplyr 0.7.0","text":"all_equal() now reports multiple problems character vector (#1819, #2442). all_equal() checks factor levels equal (#2440, #2442). bind_rows() bind_cols() give error database tables (#2373). bind_rows() works correctly NULL arguments .id argument (#2056), also zero-column data frames (#2175). Breaking change: bind_rows() combine() strict coercing. Logical values longer coerced integer numeric. Date, POSIXct integer double-based classes longer coerced integer double chance attributes information lost (#2209, @zeehio). bind_cols() now calls tibble::repair_names() ensure names unique (#2248). bind_cols() handles empty argument list (#2048). bind_cols() better handles NULL inputs (#2303, #2443). bind_rows() explicitly rejects columns containing data frames (#2015, #2446). bind_rows() bind_cols() now accept vectors. treated rows former columns latter. Rows require inner names like c(col1 = 1, col2 = 2), columns require outer names: col1 = c(1, 2). Lists still treated data frames can spliced explicitly !!!, e.g. bind_rows(!!! x) (#1676). rbind_list() rbind_all() now call .Deprecated(), removed next CRAN release. Please use bind_rows() instead. combine() accepts NA values (#2203, @zeehio) combine() bind_rows() character factor types now always warn coercion character (#2317, @zeehio) combine() bind_rows() accept difftime objects. mutate coerces results grouped dataframes accepting combinable data types (integer numeric). (#1892, @zeehio)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vector-functions-0-7-0","dir":"Changelog","previous_headings":"","what":"Vector functions","title":"dplyr 0.7.0","text":"%% gets new hybrid handler (#126). () returns NA left right NA (fixes #2562). case_when() supports NA values (#2000, @tjmahr). first(), last(), nth() better default values factor, Dates, POSIXct, data frame inputs (#2029). Fixed segmentation faults hybrid evaluation first(), last(), nth(), lead(), lag(). functions now always fall back R implementation called arguments hybrid evaluator handle (#948, #1980). n_distinct() gets larger hash tables given slightly better performance (#977). nth() ntile() careful proper data types return values (#2306). ntile() ignores NA computing group membership (#2564). lag() enforces integer n (#2162, @kevinushey). hybrid min() max() now always return numeric work correctly edge cases (empty input, NA, …) (#2305, #2436). min_rank(\"string\") longer segfaults hybrid evaluation (#2279, #2444). recode() can now recode factor types (#2268) recode() gains .dots argument support passing replacements list (#2110, @jlegewie).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"other-minor-changes-and-bug-fixes-0-7-0","dir":"Changelog","previous_headings":"","what":"Other minor changes and bug fixes","title":"dplyr 0.7.0","text":"Many error messages helpful referring column name position argument list (#2448). New is_grouped_df() alias .grouped_df(). tbl_vars() now group_vars argument set TRUE default. FALSE, group variables returned. Fixed segmentation fault calling rename() invalid grouped data frame (#2031). rename_vars() gains strict argument control error thrown try rename variable doesn’t exist. Fixed undefined behavior slice() zero-column data frame (#2490). Fixed rare case false match join (#2515). Restricted workaround match() R 3.3.0. (#1858). dplyr now warns load version R Rcpp installation different currently installed version (#2514). Fixed improper reuse attributes creating list column summarise() perhaps mutate() (#2231). mutate() summarise() always strip names attribute new updated columns, even ungrouped operations (#1689). Fixed rare error lead segmentation fault all_equal(ignore_col_order = FALSE) (#2502). “dim” “dimnames” attributes always stripped copying vector (#1918, #2049). grouped_df rowwise registered officially S3 classes. makes easier use S4 (#2276, @joranE, #2789). operations return tibbles now include \"tbl\" class. important correct printing tibble 1.3.1 (#2789). Makeflags uses PKG_CPPFLAGS defining preprocessor macros. astyle formatting C++ code, tested changed part tests (#2086, #2103). Update RStudio project settings install tests (#1952). Using Rcpp::interfaces() register C callable interfaces, registering native exported functions via R_registerRoutines() useDynLib(.registration = TRUE) (#2146). Formatting grouped data frames now works overriding tbl_sum() generic instead print(). means output consistent tibble, format() now supported also SQL sources (#2781).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-050","dir":"Changelog","previous_headings":"","what":"dplyr 0.5.0","title":"dplyr 0.5.0","text":"CRAN release: 2016-06-24","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"existing-functions-0-5-0","dir":"Changelog","previous_headings":"Breaking changes","what":"Existing functions","title":"dplyr 0.5.0","text":"arrange() ignores grouping (#1206). distinct() now keeps distinct variables. want return variables (using first row non-distinct values) use .keep_all = TRUE (#1110). SQL sources, .keep_all = FALSE implemented using GROUP , .keep_all = TRUE raises error (#1937, #1942, @krlmlr). (default behaviour using variables none specified remains - note applies select variables). select helper functions starts_with(), ends_with() etc now real exported functions. means ’ll need import functions ’re using package dplyr attached. .e. dplyr::select(mtcars, starts_with(\"m\")) used work, now ’ll need dplyr::select(mtcars, dplyr::starts_with(\"m\")).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"deprecated-and-defunct-functions-0-5-0","dir":"Changelog","previous_headings":"Breaking changes","what":"Deprecated and defunct functions","title":"dplyr 0.5.0","text":"long deprecated chain(), chain_q() %.% removed. Please use %>% instead. id() deprecated. Please use group_indices() instead (#808). rbind_all() rbind_list() formally deprecated. Please use bind_rows() instead (#803). Outdated benchmarking demos removed (#1487). Code related starting signalling clusters moved multidplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-5-0","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.5.0","text":"coalesce() finds first non-missing value set vectors. (#1666, thanks @krlmlr initial implementation). case_when() general vectorised + else (#631). if_else() vectorised statement: ’s stricter (type-safe), faster, predictable version ifelse(). SQL translated CASE statement. na_if() makes easy replace certain value NA (#1707). SQL translated NULL_IF. near(x, y) helper abs(x - y) < tol (#1607). recode() vectorised equivalent switch() (#1710). union_all() method. Maps UNION SQL sources, bind_rows() data frames/tbl_dfs, combine() vectors (#1045). new family functions replace summarise_each() mutate_each() (thus deprecated future release). summarise_all() mutate_all() apply function columns summarise_at() mutate_at() operate subset columns. columns selected either character vector columns names, numeric vector column positions, column specification select() semantics generated new columns() helper. addition, summarise_if() mutate_if() take predicate function logical vector (verbs currently require local sources). functions can now take ordinary functions instead list functions generated funs() (though useful local sources). (#1845, @lionel-) select_if() lets select columns predicate function. compatible local sources. (#497, #1569, @lionel-)","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dtplyr-0-5-0","dir":"Changelog","previous_headings":"Local backends","what":"dtplyr","title":"dplyr 0.5.0","text":"data table related code separated new dtplyr package. decouples development data.table interface development dplyr package. data.table dplyr loaded, ’ll get message reminding load dtplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"tibble-0-5-0","dir":"Changelog","previous_headings":"Local backends","what":"Tibble","title":"dplyr 0.5.0","text":"Functions related creation coercion tbl_dfs, now live package: tibble. See vignette(\"tibble\") details. $ [[ methods never partial matching (#1504), throw error variable exist. all_equal() allows compare data frames ignoring row column order, optionally ignoring minor differences type (e.g. int vs. double) (#821). test handles case df 0 columns (#1506). test fails fails convert FALSE types don’t match (#1484). all_equal() shows better error message comparing raw values types incompatible convert = TRUE (#1820, @krlmlr). add_row() makes easy add new row data frame (#1021) as_data_frame() now S3 generic methods lists (old as_data_frame()), data frames (trivial), matrices (efficient C++ implementation) (#876). longer strips subclasses. internals data_frame() as_data_frame() aligned, as_data_frame() now automatically recycle length-1 vectors. functions give informative error messages attempting create invalid data frame. can longer create data frame duplicated names (#820). check POSIXlt columns, tell use POSIXct instead (#813). frame_data() properly constructs rectangular tables (#1377, @kevinushey), supports list-cols. glimpse() now generic. default method dispatches str() (#1325). now (invisibly) returns first argument (#1570). lst() lst_() create lists way data_frame() data_frame_() create data frames (#1290). print.tbl_df() considerably faster wide data frames. now also list first 100 additional variables already screen - control new n_extra parameter print() (#1161). printing grouped data frame number groups now printed thousands separators (#1398). type list columns correctly printed (#1379) Package includes setOldClass(c(\"tbl_df\", \"tbl\", \"data.frame\")) help S4 dispatch (#969). tbl_df automatically generates column names (#1606).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"tbl_cube-0-5-0","dir":"Changelog","previous_headings":"Local backends","what":"tbl_cube","title":"dplyr 0.5.0","text":"new as_data_frame.tbl_cube() (#1563, @krlmlr). tbl_cubes now constructed correctly data frames, duplicate dimension values detected, missing dimension values filled NA. construction data frames now guesses measure variables default, allows specification dimension /measure variables (#1568, @krlmlr). Swap order dim_names met_name arguments .tbl_cube (array, table matrix) consistency tbl_cube .tbl_cube.data.frame. Also, met_name argument .tbl_cube.table now defaults \"Freq\" consistency .data.frame.table (@krlmlr, #1374).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"remote-backends-0-5-0","dir":"Changelog","previous_headings":"","what":"Remote backends","title":"dplyr 0.5.0","text":"as_data_frame() SQL sources now returns rows (#1752, #1821, @krlmlr). compute() gets new parameters indexes unique_indexes make easier add indexes (#1499, @krlmlr). db_explain() gains default method DBIConnections (#1177). backend testing system improved. lead removal temp_srcs(). unlikely event using function, can instead use test_register_src(), test_load(), test_frame(). can now use right_join() full_join() remote tables (#1172).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"sqlite-0-5-0","dir":"Changelog","previous_headings":"Remote backends","what":"SQLite","title":"dplyr 0.5.0","text":"src_memdb() session-local -memory SQLite database. memdb_frame() works like data_frame(), creates new table database. src_sqlite() now uses stricter quoting character, `, instead \". SQLite “helpfully” convert \"x\" string identifier called x current scope (#1426). src_sqlite() throws errors try use window functions (#907).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"sql-translation-0-5-0","dir":"Changelog","previous_headings":"Remote backends","what":"SQL translation","title":"dplyr 0.5.0","text":"filter.tbl_sql() now puts parens around argument (#934). Unary - better translated (#1002). escape.POSIXt() method makes easier use date times. date rendered ISO 8601 format UTC, work databases (#857). .na() gets missing space (#1695). , .na(), .null() get extra parens make precedence clear (#1695). pmin() pmax() translated MIN() MAX() (#1711). Window functions: Work ungrouped data (#1061). Warning order set cumulative window functions. Multiple partitions ordering variables windowed functions longer generate extra parentheses, work databases (#1060)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"internals-0-5-0","dir":"Changelog","previous_headings":"Remote backends","what":"Internals","title":"dplyr 0.5.0","text":"version includes almost total rewrite dplyr verbs translated SQL. Previously, used rather ad-hoc approach, tried guess new subquery needed. Unfortunately approach fraught bugs, version ’ve implemented much richer internal data model. Now three step process: applied tbl_lazy, dplyr verb captures inputs stores op (short operation) object. sql_build() iterates operations building build object represents SQL query. objects convenient testing lists, backend agnostics. sql_render() iterates queries generates SQL, using generics (like sql_select()) can vary based backend. short-term, increased abstraction likely lead minor performance decreases, chance dplyr generating correct SQL much much higher. long-term, abstractions make possible write query optimiser/compiler dplyr, make possible generate much succinct queries. written dplyr backend, ’ll need make minor changes package: sql_join() considerably simplified - now responsible generating join query, generating intermediate selects rename variable. Similarly sql_semi_join(). ’ve provided new methods backend, ’ll need rewrite. select_query() gains distinct argument used generating queries distinct(). loses offset argument never used (hence never tested). src_translate_env() replaced sql_translate_env() methods connection object. two tweaks exported API, less likely affect anyone. translate_sql() partial_eval() got new API: now use connection + variable names, rather tbl. makes testing considerably easier. translate_sql_q() renamed translate_sql_(). Also note sql generation generics now default method, instead methods DBIConnection NULL.","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"single-table-verbs-0-5-0","dir":"Changelog","previous_headings":"Minor improvements and bug fixes","what":"Single table verbs","title":"dplyr 0.5.0","text":"Avoiding segfaults presence raw columns (#1803, #1817, @krlmlr). arrange() fails gracefully list columns (#1489) matrices (#1870, #1945, @krlmlr). count() now adds additional grouping variables, rather overriding existing (#1703). tally() count() can now count variable called n (#1633). Weighted count()/tally() ignore NAs (#1145). progress bar () now updated 20 times per second, avoiding unnecessary redraws (#1734, @mkuhn) distinct() doesn’t crash given 0-column data frame (#1437). filter() throws error supply named arguments. usually type: filter(df, x = 1) instead filter(df, x == 1) (#1529). summarise() correctly coerces factors different levels (#1678), handles min/max already summarised variable (#1622), supports data frames columns (#1425). select() now informs adds missing grouping variables (#1511). works even grouping variable non-syntactic name (#1138). Negating failed match (e.g. select(mtcars, -contains(\"x\"))) returns columns, instead columns (#1176) select() helpers now exported documentation (#1410). one_of() gives useful error message variables names found data frame (#1407). naming behaviour summarise_each() mutate_each() tweaked can force inclusion function variable name: summarise_each(mtcars, funs(mean = mean), everything()) (#442). mutate() handles factors NA (#1645), different levels different groups (#1414). disambiguates NA NaN (#1448), silently promotes groups contain NA (#1463). deep copies data list columns (#1643), correctly fails incompatible columns (#1641). mutate() grouped data longer groups grouping attributes (#1120). rowwise() mutate gives expected results (#1381). one_of() tolerates unknown variables vars, warns (#1848, @jennybc). print.grouped_df() passes ... print() (#1893). slice() correctly handles grouped attributes (#1405). ungroup() generic gains ... (#922).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dual-table-verbs-0-5-0","dir":"Changelog","previous_headings":"Minor improvements and bug fixes","what":"Dual table verbs","title":"dplyr 0.5.0","text":"bind_cols() matches behaviour bind_rows() ignores NULL inputs (#1148). also handles POSIXcts integer base type (#1402). bind_rows() handles 0-length named lists (#1515), promotes factors characters (#1538), warns binding factor character (#1485). bind_rows()` flexible way can accept data frames, lists, list data frames, list lists (#1389). bind_rows() rejects POSIXlt columns (#1875, @krlmlr). bind_cols() bind_rows() infer classes grouping information first data frame (#1692). rbind() cbind() get grouped_df() methods make harder create corrupt data frames (#1385). still prefer bind_rows() bind_cols(). Joins now use correct class joining POSIXct columns (#1582, @joel23888), consider time zones (#819). Joins handle empty (#1496), duplicates (#1192). Suffixes grow progressively avoid creating repeated column names (#1460). Joins string columns substantially faster (#1386). Extra attributes ok identical (#1636). Joins work correct factor levels equal (#1712, #1559). Anti- semi-joins give correct result variable factor (#1571), warn factor levels inconsistent (#2741). clear error message given joins explicit contains unavailable columns (#1928, #1932). Warnings join column inconsistencies now contain column names (#2728). inner_join(), left_join(), right_join(), full_join() gain suffix argument allows control suffix duplicated variable names receive (#1296). Set operations (intersect(), union() etc) respect coercion rules (#799). setdiff() handles factors NA levels (#1526). number fixes enable joining data frames don’t encoding column names (#1513), including working around bug 16885 regarding match() R 3.3.0 (#1806, #1810, @krlmlr).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"vector-functions-0-5-0","dir":"Changelog","previous_headings":"Minor improvements and bug fixes","what":"Vector functions","title":"dplyr 0.5.0","text":"combine() silently drops NULL inputs (#1596). Hybrid cummean() stable floating point errors (#1387). Hybrid lead() lag() received considerable overhaul. careful complicated expressions (#1588), falls back readily pure R evaluation (#1411). behave correctly summarise() (#1434). handle default values string columns. Hybrid min() max() handle empty sets (#1481). n_distinct() uses multiple arguments data frames (#1084), falls back R evaluation needed (#1657), reverting decision made (#567). Passing arguments gives error (#1957, #1959, @krlmlr). nth() now supports negative indices select end, e.g. nth(x, -2) selects 2nd value end x (#1584). top_n() can now also select bottom n values passing negative value n (#1008, #1352). Hybrid evaluation leaves formulas untouched (#1447).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-043","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.3","title":"dplyr 0.4.3","text":"CRAN release: 2015-09-01","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"improved-encoding-support-0-4-3","dir":"Changelog","previous_headings":"","what":"Improved encoding support","title":"dplyr 0.4.3","text":"now, dplyr’s support non-UTF8 encodings rather shaky. release brings number improvement fix problems: ’s probably perfect, lot better previously version. includes fixes arrange() (#1280), bind_rows() (#1265), distinct() (#1179), joins (#1315). print.tbl_df() also received fix strings invalid encodings (#851).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"other-minor-improvements-and-bug-fixes-0-4-3","dir":"Changelog","previous_headings":"","what":"Other minor improvements and bug fixes","title":"dplyr 0.4.3","text":"frame_data() provides means constructing data_frames using simple row-wise language. (#1358, @kevinushey) .equal() longer runs outputs together (#1130). as_data_frame() gives better error message NA column names (#1101). [.tbl_df careful subsetting column names (#1245). arrange() mutate() work empty data frames (#1142). arrange(), filter(), slice(), summarise() preserve data frame meta attributes (#1064). bind_rows() bind_cols() accept lists (#1104): initial data cleaning longer need convert lists data frames, can instead feed bind_rows() directly. bind_rows() gains .id argument. supplied, creates new column gives name data frame (#1337, @lionel-). bind_rows() respects ordered attribute factors (#1112), better comparing POSIXcts (#1125). tz attribute ignored determining two POSIXct vectors comparable. tz inputs , ’s used, otherwise set UTC. data_frame() always produces tbl_df (#1151, @kevinushey) filter(x, TRUE, TRUE) now just returns x (#1210), doesn’t internally modify first argument (#971), now works rowwise data (#1099). works data tables (#906). glimpse() also prints number variables addition number observations (@ilarischeinin, #988). Joins handles matrix columns better (#1230), can join Date objects heterogeneous representations (Dates integers, numeric). also improves .equal() (#1204). Fixed percent_rank() cume_dist() missing values longer affect denominator (#1132). print.tbl_df() now displays class variables, just don’t fit screen (#1276). also displays duplicated column names correctly (#1159). print.grouped_df() now tells many groups . mutate() can set NULL first column (used segfault, #1329) better protects intermediary results (avoiding random segfaults, #1231). mutate() grouped data handles special case first groups, result consists logical vector NA. can happen condition ifelse NA logical vector (#958). mutate.rowwise_df() handles factors (#886) correctly handles 0-row inputs (#1300). n_distinct() gains na_rm argument (#1052). Progress bar used () now respects global option dplyr.show_progress (default TRUE) can turn globally (@jimhester #1264, #1226). summarise() handles expressions returning heterogenous outputs, e.g. median(), sometimes returns integer, times numeric (#893). slice() silently drops columns corresponding NA (#1235). ungroup.rowwise_df() gives tbl_df (#936). explicit duplicated column name error message (#996). “,” already used decimal point (getOption(\"OutDec\")), use “.” thousands separator printing formatted numbers (@ilarischeinin, #988).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"databases-0-4-3","dir":"Changelog","previous_headings":"","what":"Databases","title":"dplyr 0.4.3","text":"db_query_fields.SQLiteConnection uses build_sql rather paste0 (#926, @NikNakk) Improved handling log() (#1330). n_distinct(x) translated COUNT(DISTINCT(x)) (@skparkes, #873). print(n = Inf) now works remote sources (#1310).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"hybrid-evaluation-0-4-3","dir":"Changelog","previous_headings":"","what":"Hybrid evaluation","title":"dplyr 0.4.3","text":"Hybrid evaluation take place objects class (#1237). Improved $ handling (#1134). Simplified code lead() lag() make sure work properly factors (#955). respect default argument (#915). mutate can set NULL first column (used segfault, #1329). filter grouped data handles indices correctly (#880). sum() issues warning integer overflow (#1108).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-042","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.2","title":"dplyr 0.4.2","text":"CRAN release: 2015-06-16 minor release containing fixes number crashes issues identified R CMD CHECK. one new “feature”: dplyr longer complains unrecognised attributes, instead just copies output. lag() lead() grouped data confused indices therefore produced wrong results (#925, #937). lag() overrides lag() instead just default method lag.default(). necessary due changes R CMD check. use lag function provided another package, use pkg::lag. Fixed number memory issues identified valgrind. Improved performance working large number columns (#879). Lists-cols contain data frames now print slightly nicer summary (#1147) Set operations give useful error message incompatible data frames (#903). .equal() gives correct result ignore_row_order TRUE (#1065) .equal() correctly handles character missing values (#1095). bind_cols() always produces tbl_df (#779). bind_rows() gains test form data frame corruption (#1074). bind_rows() summarise() now handles complex columns (#933). Workaround using constructor DataFrame unprotected object (#998) Improved performance working large number columns (#879).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-041","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.1","title":"dplyr 0.4.1","text":"CRAN release: 2015-01-14 Don’t assume RPostgreSQL available.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-040","dir":"Changelog","previous_headings":"","what":"dplyr 0.4.0","title":"dplyr 0.4.0","text":"CRAN release: 2015-01-08","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-0-4-0","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 0.4.0","text":"add_rownames() turns row names explicit variable (#639). as_data_frame() efficiently coerces list data frame (#749). bind_rows() bind_cols() efficiently bind list data frames row column. combine() applies coercion rules vectors (works like c() unlist() consistent bind_rows() rules). right_join() (include rows y, matching rows x) full_join() (include rows x y) complete family mutating joins (#96). group_indices() computes unique integer id group (#771). can called grouped_df without arguments data frame arguments group_by().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-vignettes-0-4-0","dir":"Changelog","previous_headings":"","what":"New vignettes","title":"dplyr 0.4.0","text":"vignette(\"data_frames\") describes dplyr functions make easier faster create coerce data frames. subsumes old memory vignette. vignette(\"two-table\") describes two-table verbs work dplyr.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-0-4-0","dir":"Changelog","previous_headings":"","what":"Minor improvements","title":"dplyr 0.4.0","text":"data_frame() (as_data_frame() & tbl_df()) now explicitly forbid columns data frames matrices (#775). columns must either 1d atomic vector 1d list. () uses lazyeval correctly evaluate arguments correct environment (#744), new do_() SE equivalent () (#718). can modify grouped data place: probably bad idea ’s sometimes convenient (#737). () grouped data tables now passes columns (columns except grouping vars) (#735, thanks @kismsu). () database tables longer potentially includes grouping variables twice (#673). Finally, () gives consistent outputs rows groups (#625). first() last() preserve factors, dates times (#509). Overhaul single table verbs data.table backend. now use consistent (simpler) code base. ensures (e.g.) n() now works verbs (#579). *_join(), can now name variables different two tables, e.g. inner_join(x, y, c(\"\", \"b\", \"c\" = \"d\")) (#682). non-join columns , dplyr add .x .y suffixes distinguish source (#655). mutate() handles complex vectors (#436) forbids POSIXlt results (instead crashing) (#670). select() now implements sophisticated algorithm ’re multiples includes excludes without names, ’re likely get expect (#644). ’ll also get better error message supply input doesn’t resolve integer column position (#643). Printing received number small tweaks. print() methods invisibly return input can interleave print() statements pipeline see interim results. print() column names 0 row data frames (#652), never print 20 rows (.e. options(dplyr.print_max) now 20), 100 (#710). Row names never printed since dplyr method guaranteed preserve (#669). glimpse() prints number observations (#692) type_sum() gains data frame method. summarise() handles list output columns (#832) slice() works data tables (#717). Documentation clarifies slice can’t work relational databases, examples show achieve results using filter() (#720). dplyr now requires RSQLite >= 1.0. shouldn’t affect code way (except RSQLite now doesn’t need attached) simplify internals (#622). Functions need combine multiple results single column (e.g. join(), bind_rows() summarise()) careful coercion. Joining factors levels order preserves original levels (#675). Joining factors non-identical levels generates warning coerces character (#684). Joining character factor (vice versa) generates warning coerces character. Avoid warnings ensuring data compatible joining. rbind_list() throw error attempt combine integer factor (#751). rbind()ing column full NAs allowed just collects appropriate missing value column type collected (#493). summarise() careful NA, e.g. decision result type delayed first non NA value returned (#599). complain loss precision coercions, can happen expressions return integers groups doubles others (#599). number functions gained new improved hybrid handlers: first(), last(), nth() (#626), lead() & lag() (#683), %% (#126). means use functions dplyr verb, handle C++, rather calling back R, hence improving performance. Hybrid min_rank() correctly handles NaN values (#726). Hybrid implementation nth() falls back R evaluation n length one integer numeric, e.g. ’s expression (#734). Hybrid dense_rank(), min_rank(), cume_dist(), ntile(), row_number() percent_rank() now preserve NAs (#774) filter returns input rows columns (#782). Join functions keep attributes (e.g. time zone information) left argument POSIXct Date objects (#819), warn incompatibility (#798).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-4-0","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.4.0","text":"[.tbl_df correctly computes row names 0-column data frames, avoiding problems xtable (#656). [.grouped_df silently drop grouping don’t include grouping columns (#733). data_frame() now acts correctly first argument vector recycled. (#680 thanks @jimhester) filter.data.table() works table variable called “V1” (#615). *_join() keeps columns original order (#684). Joining factor character vector doesn’t segfault (#688). *_join functions can now deal multiple encodings (#769), correctly name results (#855). *_join.data.table() works data.table isn’t attached (#786). group_by() data table preserves original order rows (#623). group_by() supports variables 39 characters thanks fix lazyeval (#705). gives meaningful error message variable found data frame (#716). grouped_df() requires vars list symbols (#665). min(.,na.rm = TRUE) works Dates built numeric vectors (#755). rename_() generic gets missing .dots argument (#708). row_number(), min_rank(), percent_rank(), dense_rank(), ntile() cume_dist() handle data frames 0 rows (#762). preserve missing values (#774). row_number() doesn’t segfault giving external variable wrong number variables (#781). group_indices handles edge case variables (#867). Removed bogus NAs introduced coercion integer range 32-bit Windows (#2708).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-0301","dir":"Changelog","previous_headings":"","what":"dplyr 0.3.0.1","title":"dplyr 0.3.0.1","text":"CRAN release: 2014-10-08 Fixed problem test script Windows.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-03","dir":"Changelog","previous_headings":"","what":"dplyr 0.3","title":"dplyr 0.3","text":"CRAN release: 2014-10-04","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-functions-0-3","dir":"Changelog","previous_headings":"","what":"New functions","title":"dplyr 0.3","text":"() vector function efficiently determines numeric values fall range, translated special form SQL (#503). count() makes even easier (weighted) counts (#358). data_frame() @kevinushey nicer way creating data frames. never coerces column types (stringsAsFactors = FALSE!), never munges column names, never adds row names. can use previously defined columns compute new columns (#376). distinct() returns distinct (unique) rows tbl (#97). Supply additional variables return first row unique combination variables. Set operations, intersect(), union() setdiff() now methods data frames, data tables SQL database tables (#93). pass arguments base functions, ensure raise errors pass two many arguments. Joins (e.g. left_join(), inner_join(), semi_join(), anti_join()) now allow join different variables x y tables supplying named vector . example, = c(\"\" = \"b\") joins x.y.b. n_groups() function tells many groups tbl. returns 1 ungrouped data. (#477) transmute() works like mutate() drops variables didn’t explicitly refer (#302). rename() makes easy rename variables - works similarly select() preserves columns didn’t otherwise touch. slice() allows selecting rows position (#226). includes positive integers, drops negative integers can use expression like n().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"programming-with-dplyr-non-standard-evaluation-0-3","dir":"Changelog","previous_headings":"","what":"Programming with dplyr (non-standard evaluation)","title":"dplyr 0.3","text":"can now program dplyr - every function non-standard evaluation (NSE) standard evaluation (SE) version ending _. powered new lazyeval package provides tools needed implement NSE consistently correctly. See vignette(\"nse\") full details. regroup() deprecated. Please use flexible group_by_() instead. summarise_each_q() mutate_each_q() deprecated. Please use summarise_each_() mutate_each_() instead. funs_q replaced funs_.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"removed-and-deprecated-features-0-3","dir":"Changelog","previous_headings":"","what":"Removed and deprecated features","title":"dplyr 0.3","text":"%.% deprecated: please use %>% instead. chain() defunct. (#518) filter.numeric() removed. Need figure reimplement new lazy eval system. Progress refclass longer exported avoid conflicts shiny. Instead use progress_estimated() (#535). src_monetdb() now implemented MonetDB.R, dplyr. show_sql() explain_sql() matching global options dplyr.show_sql dplyr.explain_sql removed. Instead use show_query() explain().","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-and-bug-fixes-0-3","dir":"Changelog","previous_headings":"","what":"Minor improvements and bug fixes","title":"dplyr 0.3","text":"Main verbs now individual documentation pages (#519). %>% simply re-exported magrittr, instead creating local copy (#496, thanks @jimhester) Examples now use nycflights13 instead hflights variables better names interlinked tables (#562). Lahman nycflights13 () suggested packages. means many examples work unless explicitly install install.packages(c(\"Lahman\", \"nycflights13\")) (#508). dplyr now depends Lahman 3.0.1. number examples updated reflect modified field names (#586). () now displays progress bar used interactive prompts knitting (#428, @jimhester). glimpse() now prints trailing new line (#590). group_by() consistent behaviour grouping constants: creates new column value (#410). renames grouping variables (#410). first argument now .data can create new groups name x (#534). Now instead overriding lag(), dplyr overrides lag.default(), avoid clobbering lag methods added packages. (#277). mutate(data, = NULL) removes variable returned dataset (#462). trunc_mat() hence print.tbl_df() friends gets width argument control default output width. Set options(dplyr.width = Inf) always show columns (#589). select() gains one_of() selector: allows select variables provided character vector (#396). fails immediately give empty pattern starts_with(), ends_with(), contains() matches() (#481, @leondutoit). Fixed buglet select() can now create variables called val (#564). Switched RC R6. tally() top_n() work consistently: neither accidentally evaluates wt param. (#426, @mnel) rename handles grouped data (#640).","code":""},{"path":[]},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"databases-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Databases","title":"dplyr 0.3","text":"Correct SQL generation paste() used collapse parameter targeting Postgres database. (@rbdixon, #1357) db backend system completely overhauled order make possible add backends packages, support much wider range databases. See vignette(\"new-sql-backend\") instruction create (#568). src_mysql() gains method explain(). mutate() creates new variable uses window function, automatically wrap result subquery (#484). Correct SQL generation first() last() (#531). order_by() now works conjunction window functions databases support .","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"data-framestbl_df-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Data frames/tbl_df","title":"dplyr 0.3","text":"verbs now understand work difftime() (#390) AsIs (#453) objects. check colnames unique (#483), robust columns present (#348, #569, #600). Hybrid evaluation bugs fixed: Call substitution stopped early sub expression contained $ (#502). Handle :: ::: (#412). cumany() cumall() properly handle NA (#408). nth() now correctly preserve class using dates, times factors (#509). longer substitutes within order_by() order_by() needs NSE (#169). [.tbl_df always returns tbl_df (.e. drop = FALSE default) (#587, #610). [.grouped_df preserves important output attributes (#398). arrange() keeps grouping structure grouped data (#491, #605), preserves input classes (#563). contains() accidentally matched regular expressions, now passes fixed = TRUE grep() (#608). filter() asserts variables white listed (#566). mutate() makes rowwise_df given rowwise_df (#463). rbind_all() creates tbl_df objects instead raw data.frames. select() doesn’t match variables, returns 0-column data frame, instead original (#498). longer fails columns named (#492) sample_n() sample_frac() methods data.frames exported. (#405, @alyst) grouped data frame may 0 groups (#486). Grouped df objects gain basic validity checking, prevent crashes related corrupt grouped_df objects made rbind() (#606). coherence joining columns compatible different types, e.g. joining character vector factor (#455), numeric integer (#450) mutate() works zero-row grouped data frame, list columns (#555). LazySubset confused input data size (#452). Internal n_distinct() stricter inputs: requires one symbol must data frame (#567). rbind_*() handle data frames 0 rows (#597). fill character vector columns NA instead blanks (#595). work list columns (#463). Improved handling encoding column names (#636). Improved handling hybrid evaluation re $ @ (#645).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"data-tables-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Data tables","title":"dplyr 0.3","text":"Fix major omission tbl_dt() grouped_dt() methods - accidentally deep copy every result :( summarise() group_by() now retain -allocation working data.tables (#475, @arunsrinivasan). joining two data.tables now correctly dispatches data table methods, result data table (#470)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"cubes-0-3","dir":"Changelog","previous_headings":"Minor improvements and bug fixes by backend","what":"Cubes","title":"dplyr 0.3","text":"summarise.tbl_cube() works single grouping variable (#480).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-02","dir":"Changelog","previous_headings":"","what":"dplyr 0.2","title":"dplyr 0.2","text":"CRAN release: 2014-05-21","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"piping-0-2","dir":"Changelog","previous_headings":"","what":"Piping","title":"dplyr 0.2","text":"dplyr now imports %>% magrittr (#330). recommend use instead %.% easier type (since can hold shift key) flexible. %>%, can control argument RHS receives LHS using pronoun .. makes %>% useful base R functions don’t always take data frame first argument. example pipe mtcars xtabs() : Thanks @smbache excellent magrittr package. dplyr provides %>% magrittr, contains many useful functions. use , load magrittr explicitly: library(magrittr). details, see vignette(\"magrittr\"). %.% deprecated future version dplyr, won’t happen . ’ve also deprecated chain() encourage single style dplyr usage: please use %>% instead.","code":"mtcars %>% xtabs( ~ cyl + vs, data = .)"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"do-0-2","dir":"Changelog","previous_headings":"","what":"Do","title":"dplyr 0.2","text":"() completely overhauled. now two ways use , either multiple named arguments single unnamed arguments. group_by() + () equivalent plyr::dlply, except always returns data frame. use named arguments, argument becomes list-variable output. list-variable can contain arbitrary R object ’s particularly well suited storing models. use unnamed argument, result data frame. allows apply arbitrary functions group. Note use . pronoun refer data current group. () also automatic progress bar. appears computation takes longer 5 seconds lets know (approximately) much longer job take complete.","code":"library(dplyr) models <- mtcars %>% group_by(cyl) %>% do(lm = lm(mpg ~ wt, data = .)) models %>% summarise(rsq = summary(lm)$r.squared) mtcars %>% group_by(cyl) %>% do(head(., 1))"},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-verbs-0-2","dir":"Changelog","previous_headings":"","what":"New verbs","title":"dplyr 0.2","text":"dplyr 0.2 adds three new verbs: glimpse() makes possible see columns tbl, displaying much data variable can fit single line. sample_n() randomly samples fixed number rows tbl; sample_frac() randomly samples fixed fraction rows. works local data frames data tables (#202). summarise_each() mutate_each() make easy apply one functions multiple columns tbl (#178).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"minor-improvements-0-2","dir":"Changelog","previous_headings":"","what":"Minor improvements","title":"dplyr 0.2","text":"load plyr dplyr, ’ll get message suggesting load plyr first (#347). .tbl_cube() gains method matrices (#359, @paulstaab) compute() gains temporary argument can control whether results temporary permanent (#382, @cpsievert) group_by() now defaults add = FALSE sets grouping variables rather adding existing list. think people expected group_by work anyway, ’s unlikely cause problems (#385). Support MonetDB tables src_monetdb() (#8, thanks @hannesmuehleisen). New vignettes: memory vignette discusses dplyr minimises memory usage local data frames (#198). new-sql-backend vignette discusses add new SQL backend/source dplyr. changes() output clearly distinguishes columns added deleted. explain() now generic. dplyr careful setting keys data tables, never accidentally modifies object doesn’t . also avoids unnecessary key setting negatively affected performance. (#193, #255). print() methods tbl_df, tbl_dt tbl_sql gain n argument control number rows printed (#362). also works better columns containing lists complex objects. row_number() can called without arguments, case returns 1:n() (#303). \"comment\" attribute allowed (white listed) well names (#346). hybrid versions min, max, mean, var, sd sum handle na.rm argument (#168). yield substantial performance improvements functions. Special case call arrange() grouped data frame arguments. (#369)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-2","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.2","text":"Code adapted Rcpp > 0.11.1 internal DataDots class protects missing variables verbs (#314), including case ... missing. (#338) .equal.data.frame base longer bypassed. now .equal.tbl_df .equal.tbl_dt methods (#332). arrange() correctly handles NA numeric vectors (#331) 0 row data frames (#289). copy_to.src_mysql() now works windows (#323) *_join() doesn’t reorder column names (#324). rbind_all() stricter accepts list data frames (#288) rbind_* propagates time zone information POSIXct columns (#298). rbind_* less strict type promotion. numeric Collecter allows collection integer logical vectors. integer Collecter also collects logical values (#321). internal sum correctly handles integer (/)flow (#308). summarise() checks consistency outputs (#300) drops names attribute output columns (#357). join functions throw error instead crashing common variables data frames, also give better error message one data frame variable (#371). top_n() returns n rows instead n - 1 (@leondutoit, #367). SQL translation always evaluates subsetting operators ($, [, [[) locally. (#318). select() now renames variables remote sql tbls (#317) implicitly adds grouping variables (#170). internal grouped_df_impl function errors variables group (#398). n_distinct treat NA correctly numeric case #384. compiler warnings triggered -Wall -pedantic eliminated. group_by creates one group NA (#401). Hybrid evaluator evaluate expression correct environment (#403).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-013","dir":"Changelog","previous_headings":"","what":"dplyr 0.1.3","title":"dplyr 0.1.3","text":"CRAN release: 2014-03-15","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-1-3","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.1.3","text":"select() actually renames columns data table (#284). rbind_all() rbind_list() now handle missing values factors (#279). SQL joins now work better names duplicated x y tables (#310). Builds Rcpp 0.11.1 select() correctly works vars attribute (#309). Internal code stricter deciding data frame grouped (#308): avoids number situations previously caused problems. data frame joins work missing values keys (#306).","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-012","dir":"Changelog","previous_headings":"","what":"dplyr 0.1.2","title":"dplyr 0.1.2","text":"CRAN release: 2014-02-24","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"new-features-0-1-2","dir":"Changelog","previous_headings":"","what":"New features","title":"dplyr 0.1.2","text":"select() substantially powerful. can use named arguments rename existing variables, new functions starts_with(), ends_with(), contains(), matches() num_range() select variables based names. now also makes shallow copy, substantially reducing memory impact (#158, #172, #192, #232). summarize() added alias summarise() people countries don’t don’t spell things correctly ;) (#245)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-1-2","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.1.2","text":"filter() now fails given anything logical vector, correctly handles missing values (#249). filter.numeric() proxies stats::filter() can continue use filter() function numeric inputs (#264). summarise() correctly uses newly created variables (#259). mutate() correctly propagates attributes (#265) mutate.data.frame() correctly mutates variable repeatedly (#243). lead() lag() preserve attributes, now work dates, times factors (#166). n() never accepts arguments (#223). row_number() gives correct results (#227). rbind_all() silently ignores data frames 0 rows 0 columns (#274). group_by() orders result (#242). also checks columns supported types (#233, #276). hybrid evaluator handle expressions correctly, example (n() > 5) 1 else 2 subexpression n() substituted correctly. also correctly processes $ (#278). arrange() checks columns supported types (#266). also handles list columns (#282). Working towards Solaris compatibility. Benchmarking vignette temporarily disabled due microbenchmark problems reported BDR.","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"dplyr-011","dir":"Changelog","previous_headings":"","what":"dplyr 0.1.1","title":"dplyr 0.1.1","text":"CRAN release: 2014-01-29","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"improvements-0-1-1","dir":"Changelog","previous_headings":"","what":"Improvements","title":"dplyr 0.1.1","text":"new location() changes() functions provide information data frames stored memory can see gets copied. renamed explain_tbl() explain() (#182). tally() gains sort argument sort output highest counts come first (#173). ungroup.grouped_df(), tbl_df(), .data.frame.tbl_df() now make shallow copies inputs (#191). benchmark-baseball vignette now contains fairer (including grouping times) comparisons data.table. (#222)","code":""},{"path":"https://dplyr.tidyverse.org/dev/news/index.html","id":"bug-fixes-0-1-1","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"dplyr 0.1.1","text":"filter() (#221) summarise() (#194) correctly propagate attributes. summarise() throws error asked summarise unknown variable instead crashing (#208). group_by() handles factors missing values (#183). filter() handles scalar results (#217) better handles scoping, e.g. filter(., variable) variable defined function calls filter. also handles T F aliases TRUE FALSE T F variables data scope. select.grouped_df fails grouping variables included selected variables (#170) .equal.data.frame() handles corner case data frame NULL names (#217) mutate() gives informative error message unsupported types (#179) dplyr source package longer includes pandas benchmark, reducing download size 2.8 MB 0.5 MB.","code":""}]