applying a function across columns by extracting similar column names

Why not just rbind the thing?

with(do.call(rbind, my_list), 
     cbind(germany=(germany - germany_mean) / germany_sd,
           france=(france - france_mean) / france_sd))
#         germany     france
# [1,] -0.1429988 -0.5009603
# [2,] -0.4095864 -0.4157005
# [3,] -0.4427196 -0.6492633
# [4,] -0.6087181 -0.7976550
# [5,] -0.5748642 -0.7546901
# [6,] -0.5745473 -0.8392283

Do you have to use map ? Here I get your desired output using two for loops instead of using map

Result_list = vector("list",length(my_list))
for(i in 1:length(my_list))
{
  df = my_list[[i]]
  # identifier number of countries
  countries = colnames(df)[grep('mean',colnames(df))]
  countries = gsub("_mean","",countries)

  df_result = NULL
  for(j in 1:length(countries))
  {
    country = countries[j]
    value_country = df[1,match(country,colnames(df))]
    mean_country = df[1,match(paste0(country,"_mean"),colnames(df))]
    sd_country = df[1,match(paste0(country,"_sd"),colnames(df))]

    result_country = (value_country - mean_country) / sd_country
    Sentence = paste0(country,": ",round(result_country,5)," = (",value_country," - ",round(mean_country,5),") / ",round(sd_country,5))
    df_result = c(df_result,Sentence)
  }
  Result_list[[i]] = df_result
}

And the output Result_list looks like:

> Result_list
[[1]]
[1] "germany: -0.143 = (17 - 21.29429) / 30.03026" 
[2] "france: -0.50096 = (25 - 48.57103) / 47.05169"

[[2]]
[1] "germany: -0.40959 = (9 - 21.29932) / 30.02862"
[2] "france: -0.4157 = (29 - 48.56053) / 47.05439" 

[[3]]
[1] "germany: -0.44272 = (8 - 21.29475) / 30.02973"
[2] "france: -0.64926 = (18 - 48.55189) / 47.05624"

[[4]]
[1] "germany: -0.60872 = (3 - 21.27785) / 30.02679"
[2] "france: -0.79765 = (11 - 48.53825) / 47.06077"

[[5]]
[1] "germany: -0.57486 = (4 - 21.26142) / 30.02694"
[2] "france: -0.75469 = (13 - 48.52145) / 47.0676" 

[[6]]
[1] "germany: -0.57455 = (4 - 21.2532) / 30.0292" 
[2] "france: -0.83923 = (9 - 48.50559) / 47.07372"

Is it what you are looking for ?

EDIT: Extracting only results

For extracting only result values, you can do the following:

Df_result_value = NULL
for(i in 1:length(my_list))
{
  df = my_list[[i]]
  # identifier number of countries
  countries = colnames(df)[grep('mean',colnames(df))]
  countries = gsub("_mean","",countries)

  for(j in 1:length(countries))
  {
    country = countries[j]
    value_country = df[1,match(country,colnames(df))]
    mean_country = df[1,match(paste0(country,"_mean"),colnames(df))]
    sd_country = df[1,match(paste0(country,"_sd"),colnames(df))]

    result_country = (value_country - mean_country) / sd_country

    Df_result_value = rbind(Df_result_value,c(country,result_country))
  }
}
Df_result_value = data.frame(Df_result_value)
colnames(Df_result_value) = c("Country","Result")

And get this output:

> Df_result_value
   Country             Result
1  germany -0.142998843835787
2   france -0.500960300483614
3  germany -0.409586436512588
4   france -0.415700488060442
5  germany -0.442719572974515
6   france -0.649263275639099
7  germany -0.608718121899195
8   france -0.797654950237258
9  germany -0.574864249939699
10  france -0.754690110335453
11 germany -0.574547256608035
12  france -0.839228262008441

The question is unclear on the exact form of output so we assume that what is wanted is a data frame with a column for date and a column for each country in which the country value is normalized. In this case it means we want 3 columns in the output.

1) pivot_longer/_wider Bind the my_list list components together creating a data frame with a row from each component. Then for each bare country name among the columns append _root to it so that every column name except date is of the form country_suffix. Then convert to long form, perform the normalization and convert back to wide form:

library(dplyr)
library(tidyr)
library(purrr)

my_list %>%
  bind_rows %>%
  set_names(names(.)[1], sub("^([^_]*)$", "\\1_root", names(.)[-1])) %>%
  pivot_longer(-date, names_to = c("country", ".value"), names_sep = "_") %>%
  mutate(root = (root - mean) / sd) %>%
  pivot_wider(id_cols = "date", names_from = "country", values_from = "root")

giving:

# A tibble: 6 x 3
  date       germany france
  <date>       <dbl>  <dbl>
1 2016-01-01  -0.143 -0.501
2 2016-01-02  -0.410 -0.416
3 2016-01-03  -0.443 -0.649
4 2016-01-04  -0.609 -0.798
5 2016-01-05  -0.575 -0.755
6 2016-01-06  -0.575 -0.839

2) Base R

After rbinding the list components together giving d we pick out the country names, nms, as those names not containing an underscore except for the first such (which is date). Then perform the normalization and cbind the date column to that.

d <- do.call("rbind", my_list)
nms <- grep("_", names(d), invert = TRUE, value = TRUE)[-1]
cbind(d[1], (d[nms] - d[paste0(nms, "_mean")]) / d[paste0(nms, "_sd")])

giving:

        date    germany     france
1 2016-01-01 -0.1429988 -0.5009603
2 2016-01-02 -0.4095864 -0.4157005
3 2016-01-03 -0.4427196 -0.6492633
4 2016-01-04 -0.6087181 -0.7976550
5 2016-01-05 -0.5748642 -0.7546901
6 2016-01-06 -0.5745473 -0.8392283

applying a function across columns by extracting similar column names

Tags:

R

Related

Recent Posts