Column to nested list separated by /

Since the variables look like paths, I created the sample data as vector like

paths <- c(
  "a",
  "a/air",
  "a/aero/breath",
  "b",
  "b/boy",
  "b/bag/band/brand"
)

Then you can use the following function to get your nested list. I hope the choice of variablenames is explanatory enough.

pathsToNestedList <- function(x) {
  pathSplit <- strsplit(x,"/")
  pathStarts <- sapply(pathSplit,"[[",1)
  uniquePathStarts <- unique(pathStarts)
  
  pathEnds <- sapply(pathSplit, function(pathParts) {
    if(length(pathParts) <= 1) return("")
    paste0(pathParts[2:length(pathParts)],collapse="/")
  })
  
  splitLengths <- sapply(pathSplit,length)
  stillToParse <- unique(pathStarts[splitLengths > 1])
  
  endedIndices <- pathEnds == ""
  endedHere <- pathStarts[endedIndices]
  endedHere <- setdiff(endedHere,stillToParse)
  
  if(length(endedHere)) {
    pathEnds <- pathEnds[!endedIndices]
    pathStarts <- pathStarts[!endedIndices]
    uniquePathStarts <- unique(pathStarts)
    return(c(
      setNames(as.list(rep(1,length(endedHere))),endedHere),
      setNames(lapply(uniquePathStarts, function(ps) {
        pathsToNestedList(pathEnds[pathStarts == ps])
      }),uniquePathStarts)
    ))
  } else {
    return(
      setNames(lapply(uniquePathStarts, function(ps) {
        pathsToNestedList(pathEnds[!endedIndices & (pathStarts == ps)])
      }),uniquePathStarts))
  }
}

Note: I updated my answer according to your updated question.

Update: The function can be simplified to:

pathsToNestedList <- function(x) {
  nonNaIndices <- !is.na(x)
  nonEmptyIndices <- x != ""
  x <- x[nonNaIndices & nonEmptyIndices]
  if(!length(x)) return()
  
  pathSplit <- strsplit(x,"/")
  pathStarts <- sapply(pathSplit,"[[",1)
  
  pathEnds <- sapply(pathSplit, function(pathParts) {
    if(length(pathParts) <= 1) return("")
    paste0(pathParts[2:length(pathParts)],collapse="/")
  })
  
  splitLengths <- sapply(pathSplit,length)
  stillToParse <- unique(pathStarts[splitLengths > 1])
  
  endedIndices <- pathEnds == ""
  endedHere <- pathStarts[endedIndices]
  endedHere <- setdiff(endedHere,stillToParse)
  
  pathEnds <- pathEnds[!endedIndices]
  pathStarts <- pathStarts[!endedIndices]
  uniquePathStarts <- unique(pathStarts)
  
  #Concatenate the list of paths that ended with a list that is parsed again.
  #If one of those lists is empty, the concatenation behaves like
  #one would expect: It does nothing.
  return(
    c(setNames(as.list(rep(1,length(endedHere))),endedHere),
      setNames(lapply(uniquePathStarts, function(ps) {
        pathsToNestedList(pathEnds[pathStarts == ps])
      }),uniquePathStarts)
    )
  )
}

Moreover I recognized that it crashes with NA and empty strings. Hence I added a removal part in the beginning of the function.


Another option is to use rrapply() in the rrapply-package, which has a dedicated option how = "unmelt" to unmelt a data.frame to a nested list:

library(rrapply)
library(data.table)

paths <- c("a","a/air","a/aero/breath","b","b/boy","b/bag/band/brand")

## create data.frame/data.table with node paths
paths_melt <- as.data.table(tstrsplit(paths[grepl("/", paths)], split = "/"))
paths_melt[, value := 1L]
paths_melt
#>    V1   V2     V3    V4 value
#> 1:  a  air   <NA>  <NA>     1
#> 2:  a aero breath  <NA>     1
#> 3:  b  boy   <NA>  <NA>     1
#> 4:  b  bag   band brand     1

## unmelt to nested list
rrapply(paths_melt, how = "unmelt")
#> $a
#> $a$air
#> [1] 1
#> 
#> $a$aero
#> $a$aero$breath
#> [1] 1
#> 
#> 
#> 
#> $b
#> $b$boy
#> [1] 1
#> 
#> $b$bag
#> $b$bag$band
#> $b$bag$band$brand
#> [1] 1

Tags:

List

R