Python connected components

I like this algorithm:

def connected_components(neighbors):
    seen = set()
    def component(node):
        nodes = set([node])
        while nodes:
            node = nodes.pop()
            seen.add(node)
            nodes |= neighbors[node] - seen
            yield node
    for node in neighbors:
        if node not in seen:
            yield component(node)

Not only is it short and elegant, but also fast. Use it like so (Python 2.7):

old_graph = {
    0: [(0, 1), (0, 2), (0, 3)],
    1: [],
    2: [(2, 1)],
    3: [(3, 4), (3, 5)],
    4: [(4, 3), (4, 5)],
    5: [(5, 3), (5, 4), (5, 7)],
    6: [(6, 8)],
    7: [],
    8: [(8, 9)],
    9: []}

edges = {v for k, vs in old_graph.items() for v in vs}
graph = defaultdict(set)

for v1, v2 in edges:
    graph[v1].add(v2)
    graph[v2].add(v1)

components = []
for component in connected_components(graph):
    c = set(component)
    components.append([edge for edges in old_graph.values()
                            for edge in edges
                            if c.intersection(edge)])

print(components)

The result is:

[[(0, 1), (0, 2), (0, 3), (2, 1), (3, 4), (3, 5), (4, 3), (4, 5), (5, 3), (5, 4), (5, 7)],
 [(6, 8), (8, 9)]]

Thanks, aparpara for spotting the bug.


Let's simplify the graph representation:

myGraph = {0: [1,2,3], 1: [], 2: [1], 3: [4,5],4: [3,5], 5: [3,4,7], 6: [8], 7: [],8: [9], 9: []}

Here we have the function returning a dictionary whose keys are the roots and whose values are the connected components:

def getRoots(aNeigh):
    def findRoot(aNode,aRoot):
        while aNode != aRoot[aNode][0]:
            aNode = aRoot[aNode][0]
        return (aNode,aRoot[aNode][1])
    myRoot = {} 
    for myNode in aNeigh.keys():
        myRoot[myNode] = (myNode,0)  
    for myI in aNeigh: 
        for myJ in aNeigh[myI]: 
            (myRoot_myI,myDepthMyI) = findRoot(myI,myRoot) 
            (myRoot_myJ,myDepthMyJ) = findRoot(myJ,myRoot) 
            if myRoot_myI != myRoot_myJ: 
                myMin = myRoot_myI
                myMax = myRoot_myJ 
                if  myDepthMyI > myDepthMyJ: 
                    myMin = myRoot_myJ
                    myMax = myRoot_myI
                myRoot[myMax] = (myMax,max(myRoot[myMin][1]+1,myRoot[myMax][1]))
                myRoot[myMin] = (myRoot[myMax][0],-1) 
    myToRet = {}
    for myI in aNeigh: 
        if myRoot[myI][0] == myI:
            myToRet[myI] = []
    for myI in aNeigh: 
        myToRet[findRoot(myI,myRoot)[0]].append(myI) 
    return myToRet  

Let's try it:

print getRoots(myGraph)

{8: [6, 8, 9], 1: [0, 1, 2, 3, 4, 5, 7]}


The previous answer is great. Anyway, it took to me a bit to understand what was going on. So, I refactored the code in this way that is easier to read for me. I leave here the code in case someone founds it easier too (it runs in python 3.6)

def get_all_connected_groups(graph):
    already_seen = set()
    result = []
    for node in graph:
        if node not in already_seen:
            connected_group, already_seen = get_connected_group(node, already_seen)
            result.append(connected_group)
    return result


def get_connected_group(node, already_seen):
        result = []
        nodes = set([node])
        while nodes:
            node = nodes.pop()
            already_seen.add(node)
            nodes = nodes or graph[node] - already_seen
            result.append(node)
        return result, already_seen


graph = {
     0: {0, 1, 2, 3},
     1: set(),
     2: {1, 2},
     3: {3, 4, 5},
     4: {3, 4, 5},
     5: {3, 4, 5, 7},
     6: {6, 8},
     7: set(),
     8: {8, 9},
     9: set()}

components = get_all_connected_groups(graph)
print(components)

Result:

Out[0]: [[0, 1, 2, 3, 4, 5, 7], [6, 8, 9]] 

Also, I simplified the input and output. I think it's a bit more clear to print all the nodes that are in a group