Summarise to return the length by group

Using dplyr we can create groups at every occurrence of 0 using cumsum and sum the number of spells in each group.

library(dplyr)

df %>%
  group_by(month, year, group = cumsum(spell1 == 0)) %>%
  summarise(spell_length = sum(spell1)) %>%
  ungroup() %>%
  select(-group)

#    month  year spell_length
#   <int> <int>        <int>
#1     1  1981            3
#2     1  1981            4
#3     1  1981            1

Using the basic idea from @akrun but without data.table::rleid():

df %>%
 group_by(year, month, rleid = with(rle(spell1), rep(seq_along(lengths), lengths))) %>%
 filter(spell1 > 0) %>%
 ungroup() %>%
 count(month, year, rleid, name = "spell_length") %>%
 select(-rleid) 

  month  year spell_length
  <int> <int>        <int>
1     1  1981            3
2     1  1981            4
3     1  1981            1

Or:

df %>%
 group_by(year, month, rleid = with(rle(spell1), rep(seq_along(lengths), lengths))) %>%
 filter(spell1 > 0) %>%
 summarise(spell_length = length(rleid)) %>%
 ungroup() %>%
 select(-rleid)

Here's an option using dplyr::count :

library(dplyr)
count(df, month, year, grp = cumsum(spell1 == 0), zero = spell1==0) %>%
  filter(!zero) %>%
  select(-zero, - grp)

# # A tibble: 3 x 3
#   month  year     n
#   <int> <int> <int>
# 1     1  1981     3
# 2     1  1981     4
# 3     1  1981     1

Or in base R :

res <- aggregate(day ~  month + year + cumsum(spell1 == 0) + (spell1==0), df, length)
res[!res[[4]],-(3:4)]
#   month year day
# 1     1 1981   3
# 2     1 1981   4
# 3     1 1981   1

One option would be to group by 'run-length-id' of 'spell' (rleid from data.table - creates a new grouping id when the value changes in that column), filter out the rows having 'spell1' is 0, get the number of rows with n()

library(dplyr)
library(data.table)
df1 %>%
    group_by(year, month, grp = rleid(spell1)) %>%
    filter(spell1 ==1) %>%
    summarise(spell_length = n()) %>%
    ungroup %>%
    select(-grp)
# A tibble: 3 x 3
#   year month spell_length
#  <int> <int>        <int>
#1  1981     1            3
#2  1981     1            4
#3  1981     1            1

Or use rle from base R

rl1 <- rle(df1$spell1)
rl1$lengths[rl1$values > 0]
#[1] 3 4 1

NOTE: This solution also works when the 'spell1' values are different

Tags:

R

Dplyr