Calculate part of duration that occur in each hour of day

A data.table / lubridate alternative.

library(data.table)
library(lubridate)

setDT(df) 

df[ , ceil_start := ceiling_date(start_time, "hour")]

d = df[ , {
  if(ceil_start > end_time){
    .SD[ , .(start_time, dur = as.double(end_time - start_time, units = "mins"))]
  } else {
    time <- c(start_time,
              seq(from = ceil_start, to = floor_date(end_time, "hour"), by = "hour"),
              end_time)
    .(start = head(time, -1), dur = `units<-`(diff(time), "mins"))
  }
},
by = id]

setorder(d, start_time)
d[ , .(n_min = sum(dur)), by = .(date = as.Date(start_time), hour(start_time))]

#          date hour n_min
# 1: 2018-09-02    3    34
# 2: 2018-09-02    6    69
# 3: 2018-09-02    7   124
# 4: 2018-09-02    8    93
# 5: 2018-09-02   11    41
# 6: 2018-09-02   14     3

Explanation

Convert data.frame to data.table (setDT). Round up start times to nearest hour (ceiling_date(start, "hour")).

if the up-rounded time is larger than end time (if(ceil_start > end_time)), select start time and calculate duration for that hour (as.double(end_time - start_time, units = "mins")).

else, create a sequence from the up-rounded start time, to the down-rounded end time, with an hourly increment (seq(from = ceil_start, to = floor_date(end, "hour"), by = "hour")). Concatenate with start and end times. Return all times except the last (head(time, -1)) and calculate difference between time each step in minutes (`units<-`(diff(time), "mins")).

Order data by start time (setorder(d, start_time)). Sum duration by date and hour d[ , .(n_min = sum(dur)), by = .(date = as.Date(start_time), hour(start_time))].


Not the best solution since it expands the data but I think it works :

library(dplyr)
library(lubridate)

df %>%
  mutate_at(-1, ymd_hms) %>%
  mutate(time = purrr::map2(start_time, end_time, seq, by = 'min')) %>%
  tidyr::unnest(time) %>%
  mutate(hour = hour(time), date = as.Date(time)) %>%
  count(date, hour)

# A tibble: 6 x 3
#  date        hour     n
#  <date>     <int> <int>
#1 2018-09-02     3    36
#2 2018-09-02     6    70
#3 2018-09-02     7   124
#4 2018-09-02     8    97
#5 2018-09-02    11    42
#6 2018-09-02    14     4

We create a sequence from start_time to end_time with 1 minute intervals, extract hours and count occurrence of for each date and hour.


Here is an alternate solution, similar to Ronak's but without creating a minute-by-minute data-frame.

library(dplyr)
library(lubridate)

    df %>%
      mutate(hour = (purrr::map2(hour(start_time), hour(end_time), seq, by = 1))) %>%
      tidyr::unnest(hour)  %>% mutate(minu=case_when(hour(start_time)!=hour & hour(end_time)==hour ~ 1*minute(end_time),
                                 hour(start_time)==hour & hour(end_time)!=hour ~ 60-minute(start_time),
                                 hour(start_time)==hour & hour(end_time)==hour ~ 1*minute(end_time)-1*minute(start_time),
                                 TRUE ~ 60)) %>% group_by(hour) %>% summarise(sum(minu))

# A tibble: 6 x 2
   hour `sum(minu)`
  <dbl>       <dbl>
1     3          34
2     6          69
3     7         124
4     8          93
5    11          41
6    14           3