Fill a column with a vector if condition is met

Here is something basic with data.table::set()

library(data.table)
i <- 1L
n <- nrow(df)
while (i < n) {
  if (df$signal[i] == 1) {
    k <- min(i+3L, n)
    set(df, i = (i:k), j = "days", 1L:(k-i+1L))
    i <- i+4L
  } else {
    i <- i+1L
  }
}

#    signal days
# 1       0    0
# 2       1    1
# 3       0    2
# 4       0    3
# 5       1    4
# 6       0    0
# 7       0    0
# 8       1    1
# 9       1    2
# 10      1    3
# 11      1    4
# 12      1    1
# 13      1    2
# 14      0    3

Here's an Rcpp solution. Although this contains a loop, this has a very low overhead compared to R based loops, and is likely about as quick as you are going to get:

 Rcpp::cppFunction("IntegerVector fill_column(IntegerVector v) {
  bool flag = false;
  int counter = 1;
  for(int i = 0; i < v.length(); ++i) {
    if(flag){
      v[i] = counter++;
      if(counter == 5) { 
        flag = false;
        counter = 1;
      }
    } else {
      if(v[i] == 1) {
        v[i] = counter++;
        flag = true;
      }
    }
  }
  return v;
  }")

This allows you to use the function inside dplyr:

df %>% mutate(days = fill_column(signal))

##>  A tibble: 14 x 2
#>    signal  days
#>     <dbl> <int>
#>  1      0     0
#>  2      1     1
#>  3      0     2
#>  4      0     3
#>  5      1     4
#>  6      0     0
#>  7      0     0
#>  8      1     1
#>  9      1     2
#> 10      1     3
#> 11      1     4
#> 12      1     1
#> 13      1     2
#> 14      0     3

Here is a base R solution by defining a custom function f, which helps to find out where to start the vector based on given signals

f <- function(signals) {
  k <- 1
  v <- which(signals == 1)
  while (k < length(v)) {
    if (v[k + 1] - v[k] < 4) {
      v <- v[-(k + 1)]
      k <- 1
    } else {
      k <- k + 1
    }
  }
  v
}

Then, if we run the following for loop

for (i in f(df$signal)) {
  fill <- i:min(i + 3, nrow(df))
  df$days[fill] <- seq_along(fill)
}

we will see that

> df
   signal days
1       0    0
2       1    1
3       0    2
4       0    3
5       1    4
6       0    0
7       0    0
8       1    1
9       1    2
10      1    3
11      1    4
12      1    1
13      1    2
14      0    3

Tags:

R

Dplyr