ddply multiple quantiles by group

With base R you could use tapply and do.call

library(plyr)
do.call("rbind", tapply(baseball$ab, baseball$team, quantile))

do.call("rbind", tapply(baseball$ab, baseball$team, quantile, c(0.05, 0.1, 0.2)))

Or, with ddply

ddply(baseball, .(team), function(x) quantile(x$ab))

You should define the calculation for each quantile separately and use summarise. Also use .(team).

library(plyr)
data(baseball)
ddply(baseball,.(team),summarise, X0 = quantile(ab, probs = 0), X25 = quantile(ab, probs = 0.25), X50 = quantile(ab, probs = 0.50), X75 = quantile(ab, probs = 0.75), X100 = quantile(ab, probs = 1))

A slightly different approach using dplyr:

library(tidyverse)

baseball %>% 
  group_by(team) %>% 
  nest() %>% 
  mutate(
    ret = map(data, ~quantile(.$ab, probs = c(0.25, 0.75))),
    ret = invoke_map(tibble, ret)
  ) %>%
  unnest(ret)

Here you can specify the needed quantiles in the probs argument.

The invoke_map call seems to be necessary, as quantile does not return a data frame; see this answer.

You can also put that all into a function:

get_quantiles <- function(.data, .var, .probs = c(0.25, 0.75), .group_vars = vars()) {
  .var = deparse(substitute(.var))
  return(
    .data %>% 
    group_by_at(.group_vars) %>% 
    nest() %>% 
    mutate(
      ret = map(data, ~quantile(.[[.var]], probs = .probs)),
      ret = invoke_map(tibble, ret)
    ) %>%
    unnest(ret, .drop = TRUE)
  )
}

mtcars %>% get_quantiles(wt, .group_vars = vars(cyl))

A new approach would be to use group_modify() from dplyr. Then you'd call:

baseball %>%
  group_by(team) %>% 
  group_modify(~{
    quantile(.x$ab, probs = c(0.25, 0.75)) %>% 
    tibble::enframe()
  }) %>%
  spread(name, value)

You can do this with non-standard quantiles, in dplyr:

library(plyr)
data(baseball)
library(dplyr)
prob=c(0.2, 0.8)
summarise(group_by(baseball,team), 
    p1 = quantile(ab, probs = prob[1]), 
    p2 = quantile(ab, probs = prob[2]))

NB this is dplyr::summarise, not plyr::summarise

Tags:

R

Plyr