How to get distinct count of values of all the columns of a table based on where condition in sql server?

First get the columns and use stuff to generate the select in this way:

SELECT COUNT(ColumnA) AS ColumnA, COUNT(ColumnB AS ColumnB), COUNT(ColumnC) AS ColumnC....

That way you only select on your table once to get all counts, After that, use CROSS APPLY to "unpivot" those columns and return the output on one row per column

CROSS APPLY(
    VALUES(1, 'ColumnA', ColumnA), (2, 'ColumnB', ColumnB), (3, 'ColumnC', ColumnC)
)(ID, ColumnName, DistinctCountValue)

For the filter, use sp_executesql and send the file_id as parameter

exec SP_executesql @SQL, N'@FID INT', @FID = @FileID

Since you are using all columns of the table Row_Number() over(partition by Table_Schema, Table_Name order by ORDINAL_POSITION) as RowNum becomes redundant, ORDINAL_POSITION already has the value that you are looking for

declare @tablename nvarchar(50) = 'MyTestTable'
declare @fileID int = 1
declare @SQL nvarchar(max)
set @SQL = ''
;with cols as (
select TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION
from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = @TableName
)

select @SQL = ';WITH  CTE AS (SELECT 
' + 
    STUFF((
    SELECT ', COUNT(DISTINCT ' + QUOTENAME(COLUMN_NAME) + ') AS ' + QUOTENAME(COLUMN_NAME)
    FROM cols
    ORDER BY ORDINAL_POSITION
    FOR XML PATH('')
    ), 1, 1, '')
+ '
FROM ' + @TableName + '
WHERE File_ID = @FID
)
SELECT B.*
FROM CTE
CROSS APPLY (
    VALUES ' +STUFF((
    SELECT ',( ' + CAST(ORDINAL_POSITION AS VARCHAR) + ',' + QUOTENAME(COLUMN_NAME,'''') + ',' + QUOTENAME(COLUMN_NAME) + ')'
    FROM cols
    ORDER BY ORDINAL_POSITION
    FOR XML PATH('')
    ), 1, 1, '') + '
)B (ID,ColumnName,DistinctCountValue) 
'
from cols


exec SP_executesql @SQL, N'@FID INT', @FID = @FileID

The query below creates a table of all the column names and uses a while loop to select the count for whatever WHERE clause you want to use. This should be pretty flexible for any table; just update the top variables. Note that this will not count a column where its value is null. You can add a case to the @Query parameter if that's what you want. Since it processes each row individually, I added in a temp table so you only hit the db once.

IF OBJECT_ID('tempdb..##SourceValues') IS NOT NULL
    DROP TABLE ##SourceValues
DECLARE @Schema VARCHAR(50) = 'SomeSchema'
DECLARE @Table VARCHAR(50) = 'SomeTable'
DECLARE @WhereClause VARCHAR(MAX) = ' Some WHERE clause'
DECLARE @ColumnName VARCHAR(50)
DECLARE @ProcessedRows TABLE(ColumnName VARCHAR(50), DistinctCount INT)
DECLARE @Columns TABLE(RowNumber INT,  ColumnName VARCHAR(100)) 
INSERT INTO @Columns SELECT ROW_NUMBER() OVER(ORDER BY COLUMN_NAME DESC),  COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @Table
DECLARE @Count INT = (SELECT MAX(RowNumber) FROM @Columns)
DECLARE @Counter INT = 0
DECLARE @DistinctCount INT 
DECLARE @Query NVARCHAR(MAX)
EXEC('SELECT * INTO ##SourceValues FROM ' + @Table +' (NOLOCK)')
WHILE @Counter < @Count
BEGIN 
    SET @Counter += 1
    SET @ColumnName = (SELECT ColumnName FROM @Columns WHERE RowNumber = @Counter)
    SET @Query = 'SELECT @OutPut = COUNT(' + @ColumnName + ') FROM ' + @Schema + '.' + ' ##SourceValues ' + @WhereClause
    EXECUTE sp_executesql @Query, N'@Output INT OUT', @DistinctCount OUT
    INSERT INTO @ProcessedRows(ColumnName, DistinctCount) VALUES (@ColumnName, @DistinctCount)
END
SELECT * FROM @ProcessedRows

Let's try some different approach.

Get all values unpivoted as Param/Value:

1) Collect list of tables and columns to be used in dynamic SQL:

DROP TABLE IF EXISTS #Base;
;WITH SchemaData AS (
    SELECT t.name AS [TableName],c.name AS [ColumnName],c.column_id AS [ColumnOrderID]
    FROM sys.tables t
    INNER JOIN sys.columns c ON c.object_id = t.object_id
)
SELECT t.TableName
    ,STUFF((SELECT ',CONVERT(NVARCHAR(MAX),' + QUOTENAME([ColumnName]) + ') AS ' + QUOTENAME([ColumnName]) 
            FROM SchemaData a WHERE (a.TableName = t.TableName) FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS [SelectClause]
    ,STUFF((SELECT ',' + QUOTENAME([ColumnName]) FROM SchemaData a WHERE (a.TableName = t.TableName) FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS [UnpivotClause]
INTO #Base
FROM SchemaData t
GROUP BY t.TableName
;

2) Get all data inside a temp table

DROP TABLE IF EXISTS #Result;
CREATE TABLE #Result(TableName NVARCHAR(255),ColumnName NVARCHAR(255),[Value] NVARCHAR(MAX));

DECLARE @TableName NVARCHAR(255),@SelectClause NVARCHAR(MAX),@UnpivotClause NVARCHAR(MAX);
DECLARE crPopulateResult CURSOR LOCAL FAST_FORWARD READ_ONLY FOR SELECT b.TableName,b.SelectClause,b.UnpivotClause FROM #Base b;

OPEN crPopulateResult;
FETCH NEXT FROM crPopulateResult INTO @TableName,@SelectClause,@UnpivotClause;

DECLARE @dSql NVARCHAR(MAX);
WHILE @@FETCH_STATUS = 0
BEGIN
    SELECT @dSql = N'   INSERT INTO #Result(TableName,[ColumnName],[Value])
    SELECT up.TableName,up.Param AS [ColumnName],up.[Value]
    FROM (
        SELECT ''' + @TableName + N''' AS [TableName]
            ,' + @SelectClause + N'
        FROM ' + QUOTENAME(@TableName) + N'
    ) a
    UNPIVOT(Value FOR Param IN (' + @UnpivotClause + N')) up
    ';
    EXEC sp_executesql @stmt = @dSql;

    FETCH NEXT FROM crPopulateResult INTO @TableName,@SelectClause,@UnpivotClause;
END

CLOSE crPopulateResult;
DEALLOCATE crPopulateResult;

3) Any filters can be applied with #Results, including Table names, column names, data filters, etc:

SELECT r.TableName,r.ColumnName,COUNT(*) AS [CountValue],COUNT(DISTINCT r.[Value]) AS [DistinctCountValue]
FROM #Result r
--
--WHERE r.ColumnName = 'file_id' AND r.[Value] = '1'
--
GROUP BY r.TableName,r.ColumnName
ORDER BY r.TableName,r.ColumnName
;

Tags:

Sql

Sql Server