Looping through multiple arrays & concatenating values in pandas

Assuming a column of lists, explode the lists, then this is a simple isin check that we sum along the original index. I'd suggest a different output, which gets across the same information but is much easier to work with in the future.


import pandas as pd

df = pd.DataFrame({'Items': [['X1', 'Y1', 'Z1'], ['X2', 'Z3'], ['X3'],
                             ['X1', 'X2'], ['Y2', 'Y4', 'Z2', 'Y5', 'Z3'],
                             ['X2', 'X3', 'Y1', 'Y2', 'Z2', 'Z4', 'X1']]})
X = ['X1','X2','X3','X4','X5']
Y = ['Y1','Y2','Y3','Y4','Y5']
Z = ['Z1','Z2','Z3','Z4','Z5']

s = df.explode('Items')['Items']
           for name, l in [('X', X), ('Y', Y), ('Z', Z)]], axis=1).astype(int)
#   X  Y  Z
#0  1  1  1
#1  1  0  1
#2  1  0  0
#3  2  0  0
#4  0  3  2
#5  3  2  2

To get your output, mask the 0s and add the columns names after the values. Then we string join to get the result. Here I use an apply for simplicity, alignment and NaN handling, but there are other slightly faster alternatives.

res = pd.concat([s.isin(l).sum(level=0).rename(name) 
                 for name, l in [('X', X), ('Y', Y), ('Z', Z)]], axis=1).astype(int)

res = res.astype(str).replace('1', '').where(res.ne(0))
res = res.add(res.columns, axis=1)

# Aligns on index due to `.sum(level=0)`
df['Category'] = res.apply(lambda x: ' & '.join(x.dropna()), axis=1) 
#                          Items      Category
#0                  [X1, Y1, Z1]     X & Y & Z
#1                      [X2, Z3]         X & Z
#2                          [X3]             X
#3                      [X1, X2]            2X
#4          [Y2, Y4, Z2, Y5, Z3]       3Y & 2Z
#5  [X2, X3, Y1, Y2, Z2, Z4, X1]  3X & 2Y & 2Z


df = pd.DataFrame(

X = ['X1', 'X2', 'X3', 'X4', 'X5']
Y = ['Y1', 'Y2', 'Y3', 'Y4', 'Y5']
Z = ['Z1', 'Z2', 'Z3', 'Z4', 'Z5']


from collections import Counter

M = {**dict.fromkeys(X, 'X'), **dict.fromkeys(Y, 'Y'), **dict.fromkeys(Z, 'Z')}

num = lambda x: {1: ''}.get(x, x)
cat = ' & '.join
fmt = lambda c: cat(f'{num(v)}{k}' for k, v in c.items())
cnt = lambda x: Counter(map(M.get, x.split(',')))

df.assign(Category=[*map(fmt, map(cnt, df.Items))])

                  Items      Category
0              X1,Y1,Z1     X & Y & Z
1                 X2,Z3         X & Z
2                    X3             X
3                 X1,X2            2X
4        Y2,Y4,Z2,Y5,Z3       3Y & 2Z
5  X2,X3,Y1,Y2,Z2,Z4,X1  3X & 2Y & 2Z


pandas.Series.str.get_dummies and groupby

First convert the definitions of X, Y, and Z into one dictionary, then use that as the argument for groupby on axis=1

M = {**dict.fromkeys(X, 'X'), **dict.fromkeys(Y, 'Y'), **dict.fromkeys(Z, 'Z')}

counts = df.Items.str.get_dummies(',').groupby(M, axis=1).sum()

   X  Y  Z
0  1  1  1
1  1  0  1
2  1  0  0
3  2  0  0
4  0  3  2
5  3  2  2

Add the desired column
Work in Progress I don't like this solution

def fmt(row):
    a = [f'{"" if v == 1 else v}{k}' for k, v in row.items() if v > 0]
    return ' & '.join(a)

df.assign(Category=counts.apply(fmt, axis=1))

                  Items      Category
0              X1,Y1,Z1     X & Y & Z
1                 X2,Z3         X & Z
2                    X3             X
3                 X1,X2            2X
4        Y2,Y4,Z2,Y5,Z3       3Y & 2Z
5  X2,X3,Y1,Y2,Z2,Z4,X1  3X & 2Y & 2Z


Because I'm leveraging the character of your contrived example and there is nowai you should depend on the first character of your values to be the thing that differentiates them.

from operator import itemgetter

df.Items.str.get_dummies(',').groupby(itemgetter(0), axis=1).sum()

   X  Y  Z
0  1  1  1
1  1  0  1
2  1  0  0
3  2  0  0
4  0  3  2
5  3  2  2

Create your dataframe

import pandas as pd

df = pd.DataFrame({'Items': [['X1', 'Y1', 'Z1'], 
                            ['X2', 'Z3'], 
                            ['X1', 'X2'], 
                            ['Y2', 'Y4', 'Z2', 'Y5', 'Z3'],
                            ['X2', 'X3', 'Y1', 'Y2', 'Z2', 'Z4', 'X1']]})


df_exp = df.explode('Items')

def check_if_in_set(item, set):
    return 1 if (item in set) else 0

dict = {'X': set(['X1','X2','X3','X4','X5']),
        'Y': set(['Y1','Y2','Y3','Y4','Y5']), 
        'Z': set(['Z1','Z2','Z3','Z4','Z5'])}

for l, s in dict.items():
    df_exp[l] = df_exp.apply(lambda row: check_if_in_set(row['Items'], s), axis=1)


    Items_list = ('Items', list),
    X_count = ('X', 'sum'),
    y_count = ('Y', 'sum'),
    Z_count = ('Z', 'sum')

                      Items_list  X_count  y_count  Z_count
0                   [X1, Y1, Z1]        1        1        1
1                       [X2, Z3]        1        0        1
2                           [X3]        1        0        0
3                       [X1, X2]        2        0        0
4           [Y2, Y4, Z2, Y5, Z3]        0        3        2
5  [X2, X3,  Y1, Y2, Z2, Z4, X1]        3        2        2