Flatten complex directory structure in Python

Run recursively through directory, move the files and launch move for directories:

import shutil
import os

def move(destination, depth=None):
    if not depth:
        depth = []
    for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
        if os.path.isfile(file_or_dir):
            shutil.move(file_or_dir, destination)
        else:
            move(destination, os.path.join(depth + [file_or_dir], os.sep))

this would do, it also renames files if they collide (I commented out the actual move and replaced with a copy):

import os
import sys
import string
import shutil

#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            for name in files:
                yield os.path.join(root, name)
    else:
        yield path

destination = "./newdir/"
fromdir = "./test/"
for f in getfiles(fromdir):
    filename = string.split(f, '/')[-1]
    if os.path.isfile(destination+filename):
        filename = f.replace(fromdir,"",1).replace("/","_")
    #os.rename(f, destination+filename)
    shutil.copy(f, destination+filename)

import os.path, shutil

def move(src, dest):
    not_in_dest = lambda x: os.path.samefile(x, dest)
    files_to_move = filter(not_in_dest,
                           glob_recursive(path=src))

    for f in files_to_move:
        shutil.move(f, dest)

Source for glob_recursive. Does not change name of file, if they collide.

samefile is a safe way to compare paths. But it doesn't work on Windows, so check How to emulate os.path.samefile behaviour on Windows and Python 2.7?.


I don't like testing the name of the file about to be moved to see if we're already in the destination directory. Instead, this solution only scans the subdirectories of the destination

import os
import itertools
import shutil


def move(destination):
    all_files = []
    for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
        for filename in files:
            all_files.append(os.path.join(root, filename))
    for filename in all_files:
        shutil.move(filename, destination)

Explanation: os.walk walks recursively the destination in a "top down" manner. whole filenames are constructed with the os.path.join(root, filename) call. Now, to prevent scanning files at the top of the destination, we just need to ignore the first element of the iteration of os.walk. To do that I use islice(iterator, 1, None). One other more explicit way would be to do this:

def move(destination):
    all_files = []
    first_loop_pass = True
    for root, _dirs, files in os.walk(destination):
        if first_loop_pass:
            first_loop_pass = False
            continue
        for filename in files:
            all_files.append(os.path.join(root, filename))
    for filename in all_files:
        shutil.move(filename, destination)