Retain attributes when using gather from tidyr (attributes are not identical)

You don't seem to like my base solutions. Let me tempt you once more

(df <- data.frame(hh_id = 1:2,
                  income = c(55000, 94000),
                  bday_01 = ymd(c(20150309, 19890211)),
                  bday_02 = ymd(c(19850911, 20000815)),
                  gender_01 = factor(c("M", "F")),
                  gender_02 = factor(c("F", "F"))))


reshape(df, idvar = 'hh_id', varying = list(3:4, 5:6), direction = 'long',
        v.names = c('bday','gender'), timevar = 'per_num')

#     hh_id income    per_num       bday gender
# 1.1     1  55000          1 2015-03-09      M
# 2.1     2  94000          1 1989-02-11      F
# 1.2     1  55000          2 1985-09-11      F
# 2.2     2  94000          2 2000-08-15      F

You could just convert your dates to character then convert them back to dates at the end:

(person <- df %>% 
      select(hh_id, bday_01:gender_02) %>% 
      mutate_each(funs(as.character), contains('bday')) %>%
      gather(key, value, -hh_id) %>%
      separate(key, c("key", "per_num"), sep = "_") %>%
      spread(key, value) %>%
      mutate(bday=ymd(bday)))

  hh_id per_num       bday gender
1     1      01 2015-03-09      M
2     1      02 1985-09-11      F
3     2      01 1989-02-11      F
4     2      02 2000-08-15      F

Alternatively, if you use Date instead of POSIXct, you could do something like this:

(person <- df %>% 
      select(hh_id, bday_01:gender_02) %>% 
      gather(per_num1, gender, contains('gender'), convert=TRUE) %>%
      gather(per_num2, bday, contains('bday'), convert=TRUE) %>%
      mutate(bday=as.Date(bday)) %>%
      mutate_each(funs(str_extract(., '\\d+')), per_num1, per_num2) %>%
      filter(per_num1 == per_num2) %>%
      rename(per_num=per_num1) %>%
      select(-per_num2))

Edit

The warning you're seeing:

Warning: attributes are not identical across measure variables; they will be dropped

arises from gathering the gender columns, which are factors and have different level vectors (see str(df)). If you were to convert the gender columns to character or if you were to synchronize their levels with something like,

df <- mutate(df, gender_02 = factor(gender_02, levels=levels(gender_01)))

then you will see that the warning goes away when you execute

person <- df %>% 
        select(hh_id, bday_01:gender_02) %>% 
        gather(key, value, contains('gender'))

With tidyr 1.0.0 it can be done as follows :

suppressPackageStartupMessages({
  library(tidyr)
  library(lubridate)
})
df <- data.frame(hh_id = 1:2,
                 income = c(55000, 94000),
                 bday_01 = ymd(c(20150309, 19890211)),
                 bday_02 = ymd(c(19850911, 20000815)),
                 gender_01 = factor(c("M", "F")),
                 gender_02 = factor(c("F", "F")))

pivot_longer(df, -(1:2), names_to = c(".value","per_num"),names_sep = "_" )
#> # A tibble: 4 x 5
#>   hh_id income per_num bday       gender
#>   <int>  <dbl> <chr>   <date>     <fct> 
#> 1     1  55000 01      2015-03-09 M     
#> 2     1  55000 02      1985-09-11 F     
#> 3     2  94000 01      1989-02-11 F     
#> 4     2  94000 02      2000-08-15 F

Created on 2019-09-14 by the reprex package (v0.3.0)

Tags:

R

Tidyr