How to get the position of elements in a list?

Similar to docendo's, but attempting to operate as much as possible inside the recursion than fixing the result afterwards:

ff = function(x)
{
    if(!is.list(x)) if(length(x)) return(seq_along(x)) else return(NA)
    lapply(seq_along(x), 
           function(i) cbind(i, do.call(rBind, as.list(ff(x[[i]])))))
}

ans = do.call(rBind, ff(l2))
data.frame(value = unlist(l2), 
           ans[rowSums(is.na(ans[, -1L])) != (ncol(ans) - 1L), ])
#   value X1 X2 X3 X4 X5
#1      a  1  1 NA NA NA
#2      b  2  1  1 NA NA
#3      c  2  2  1  1 NA
#4      d  2  2  2  1 NA
#5      a  2  2  2  2 NA
#6      e  2  2  2  3 NA
#7      e  4  1 NA NA NA
#8      b  4  2 NA NA NA
#9      e  5  1  1 NA NA
#10     f  6  1  1  1  1

rBind is a wrapper around rbind to avoid the "non-matching columns" errors:

rBind = function(...) 
{
    args = lapply(list(...), function(x) if(is.matrix(x)) x else matrix(x))
    nc = max(sapply(args, ncol))
    do.call(rbind, 
            lapply(args, function(x) 
                           do.call(cbind, c(list(x), rep_len(list(NA), nc - ncol(x))))))
}

Here's an approach that yields a slightly different output than you showed, but it'll be useful further down the road.

f <- function(l) {
  names(l) <- seq_along(l)
  lapply(l, function(x) {
    x <- setNames(x, seq_along(x))
    if(is.list(x)) f(x) else x
  })
}

Function f simply iterates (recursively) through all levels of the given list and names it's elements 1,2,...,n where n is the length of the (sub)list. Then, we can make use of the fact that unlist has a use.names argument that is TRUE by default and has effect when used on a named list (that's why we have to use f to name the list first).

For the nested list l2 it returns:

unlist(f(l2))
#      1.1     2.1.1   2.2.1.1   2.2.2.1   2.2.2.2   2.2.2.3       4.1       4.2     5.1.1 6.1.1.1.1 
#      "a"       "b"       "c"       "d"       "a"       "e"       "e"       "b"       "e"       "f" 

Now, in order to return a data.frame as asked for in the question, I'd do this:

g <- function(l) {
  vec <- unlist(f(l))
  n <- max(lengths(strsplit(names(vec), ".", fixed=TRUE)))
  require(tidyr)
  data.frame(
    value = unname(vec),
    i = names(vec)
  ) %>% 
    separate(i, paste0("i", 1:n), sep = "\\.", fill = "right", convert = TRUE)
}

And apply it like this:

g(l2)
#   value i1 i2 i3 i4 i5
#1      a  1  1 NA NA NA
#2      b  2  1  1 NA NA
#3      c  2  2  1  1 NA
#4      d  2  2  2  1 NA
#5      a  2  2  2  2 NA
#6      e  2  2  2  3 NA
#7      e  4  1 NA NA NA
#8      b  4  2 NA NA NA
#9      e  5  1  1 NA NA
#10     f  6  1  1  1  1

An improved version of g, contributed by @AnandaMahto (thanks!), would use data.table:

g <- function(inlist) {
    require(data.table)
    temp <- unlist(f(inlist))
    setDT(tstrsplit(names(temp), ".", fixed = TRUE))[, value := unname(temp)][]
}

Edit (credits go to @TylerRinkler - thanks!)

This has the beneft of easily being converted to a data.tree object which can then be converted to many other data types. With a slight mod to g:

g <- function(l) {
  vec <- unlist(f(l))
  n <- max(lengths(strsplit(names(vec), ".", fixed=TRUE)))
  require(tidyr)
  data.frame(
    i = names(vec),
    value = unname(vec)
  ) %>% 
    separate(i, paste0("i", 1:n), sep = "\\.", fill = "right", convert = TRUE)
}

library(data.tree)

x <- data.frame(top=".", g(l2))
x$pathString <- apply(x, 1, function(x) paste(trimws(na.omit(x)), collapse="/"))
mytree <- data.tree::as.Node(x)

mytree
#                   levelName
#1  .                        
#2   ¦--1                    
#3   ¦   °--1                
#4   ¦       °--a            
#5   ¦--2                    
#6   ¦   ¦--1                
#7   ¦   ¦   °--1            
#8   ¦   ¦       °--b        
#9   ¦   °--2                
#10  ¦       ¦--1            
#11  ¦       ¦   °--1        
#12  ¦       ¦       °--c    
#13  ¦       °--2            
#14  ¦           ¦--1        
#15  ¦           ¦   °--d    
#16  ¦           ¦--2        
#17  ¦           ¦   °--a    
#18  ¦           °--3        
#19  ¦               °--e    
#20  ¦--4                    
#21  ¦   ¦--1                
#22  ¦   ¦   °--e            
#23  ¦   °--2                
#24  ¦       °--b            
#25  ¦--5                    
#26  ¦   °--1                
#27  ¦       °--1            
#28  ¦           °--e        
#29  °--6                    
#30      °--1                
#31          °--1            
#32              °--1        
#33                  °--1    
#34                      °--f 

And to produce a nice plot:

plot(mytree)

pic

Other forms of presenting the data:

as.list(mytree)
ToDataFrameTypeCol(mytree)

More on converting data.tree types:

https://cran.r-project.org/web/packages/data.tree/vignettes/data.tree.html#tree-conversion http://www.r-bloggers.com/how-to-convert-an-r-data-tree-to-json/


Here's an alternative. It's not going to be as fast as the approach by @docendodiscimus, but it is still pretty straightforward.

The basic idea is to use melt from "reshape2"/"data.table". melt has a method for lists that creates output like the following:

melt(l2)
#    value L3 L2 L4 L1
# 1      a NA NA NA  1
# 2      b NA  1 NA  2
# 3      c  1  2 NA  2
# 4      d  2  2 NA  2
# 5      a  2  2 NA  2
# 6      e  2  2 NA  2
# 7      e NA NA NA  4
# 8      b NA NA NA  4
# 9      e NA  1 NA  5
# 10     f  1  1  1  6

Except for the column ordering and the last value that you're interested in, that seems to have all the info you're after. To get the last value you're interested in, you can use rapply(l2, seq_along).

Putting those two requirements together, you would have something like this:

myFun <- function(inlist) {
  require(reshape2)                           ## Load required package
  x1 <- melt(inlist)                          ## Melt the data
  x1[[paste0("L", ncol(x1))]] <- NA_integer_  ## Add a column to hold the position info
  x1 <- x1[c(1, order(names(x1)[-1]) + 1)]    ## Reorder the columns
  vals <- rapply(inlist, seq_along)           ## These are the positional values
  positions <- max.col(is.na(x1), "first")    ## This is where the positions should go
  x1[cbind(1:nrow(x1), positions)] <- vals    ## Matrix indexing for replacement
  x1                                          ## Return the output
}

myFun(l2)
#    value L1 L2 L3 L4 L5
# 1      a  1  1 NA NA NA
# 2      b  2  1  1 NA NA
# 3      c  2  2  1  1 NA
# 4      d  2  2  2  1 NA
# 5      a  2  2  2  2 NA
# 6      e  2  2  2  3 NA
# 7      e  4  1 NA NA NA
# 8      b  4  2 NA NA NA
# 9      e  5  1  1 NA NA
# 10     f  6  1  1  1  1

The "data.table" version of g from the answer by @docendodiscimus is a little bit more direct:

g <- function(inlist) {
  require(data.table)
  temp <- unlist(f(inlist))
  setDT(tstrsplit(names(temp), ".", fixed = TRUE))[, value := unname(temp)][]
}

Tags:

List

R