Write a column selectively to the appropriate row using pgfplotstable?

Edit See the edit history for the incomplete answer.


What I did is to run over the Reg.No. entries and try to see if there is a match in the number column of \markspgf table. If there is, then we use the marks value of that row in creating the new column, otherwise we put a N/A. (if not needed this can be skipped). Then, we record the counter value since \pgfplotstablerow variable is not available now so we keep track of the rows that failed to be matched with and increment the counter.

When the numbers are exhausted, we use this new column in the new table and we again check at each row whether this row is to be used or not. If the row number is matched with the first number on the list, we trigger the if condition with \pgfplotsuserowfalse otherwise nothing happens and the row is typeset.

\documentclass{article}
\usepackage{pgfplotstable,xstring}
\usepackage{filecontents,etoolbox}
%
\begin{filecontents}{namespgf.csv}
Reg.No.,Name,Place
5501,Kathirvelu A,re
5502,Gugan K,rt
5503,Kalaitchelvi S,uy
5504,Suresh S,itr
5505,Mahesh K,utyehd
5506,Balini N,utyehd
5507,Kumar H,utyehd
5508,Khalate A,utyehd
5509,me,hgte
5510,you,there
5511,them,here
5512,who,where 
\end{filecontents}
%
\begin{filecontents}{markspgf.csv}
number,marks
5501,67
5502,25
5503,62
5505,95
5507,100
5512,45
\end{filecontents}
\def\marksuccess{}
\def\mypopulatedlist{}
\newcounter{myrowcount}
\setcounter{myrowcount}{0}


\begin{document}
%=============================================================================
\pgfplotstableread[col sep=comma]{namespgf.csv}\namespgf
\pgfplotstableread[col sep=comma,verb string type]{markspgf.csv}\markspgf
%
\pgfkeys{/pgfplots/table/verb string type}
% ----------------------------------------------------------------------%
\pgfplotstablecreatecol[
create col/assign/.code={%
\let\marksuccess\relax
    \pgfplotstableforeachcolumnelement{number}\of\markspgf\as\cellb{%
        \IfStrEq{\thisrow{Reg.No.}}{\cellb}{%True
            \pgfplotstablegetelem{\pgfplotstablerow}{marks}\of\markspgf
            \edef\myretval{\pgfplotsretval}
            \def\marksuccess{1}
             %\breakforeach %This would have saved some time if it was available
        }
        {}%False
    }
    \if\marksuccess\relax
    \def\myretval{N/A}%
    \xappto\mypopulatedlist{\arabic{myrowcount},}
    \else
    \fi
    \stepcounter{myrowcount}
    \pgfkeyslet{/pgfplots/table/create col/next content}\myretval
}]{Marks}\namespgf


\pgfplotstabletypeset[
column type=l,
columns={Reg.No.,Name,Marks},
row predicate/.code={%
\StrBefore{\mypopulatedlist}{,}[\mynextrow] %Look at the first number before the comma on the list
\IfStrEq{#1}{\mynextrow}{%If the current row is equal to that number
\StrLen{\mynextrow}[\numberlength]%Get the length of the number
\StrGobbleLeft{\mypopulatedlist}{\number\numexpr\numberlength+1\relax}[\mytemplist]%Delete this number and the next comma from the list
\edef\mypopulatedlist{\mytemplist}%Overwrite the old list with the new
\pgfplotstableuserowfalse% Tell pgfplots to omit this row
}{}
}
]\namespgf

\end{document}

enter image description here


Here is a solution that uses the datatool package:

enter image description here

Notes:

  • In the MWE below, I used the filecontents* so that the filecontents header is not added to the data file.

Code:

\documentclass{article}
\usepackage{datatool}
\usepackage{xstring}
\usepackage{filecontents}

\begin{filecontents*}{namespgf.csv}
Reg.No.,Name
5501,Kathirvelu A
5502,Gugan K
5503,Kalaitchelvi S
5504,Suresh S
5505,Mahesh K
\end{filecontents*}
%
\begin{filecontents*}{markspgf.csv}
number,marks
5501,67
5502,25
5503,62
5505,95
\end{filecontents*}

%------- Useful for debuging ---------------
\newcommand{\PrintNamesDTLTable}[1]{% #1 = names database
    \begin{tabular}{c c}%
        Reg No & Name \\\hline%
    \DTLforeach{#1}{%
        \RegNo=Reg.No.,%
        \Name=Name%
        }{%
        \RegNo & \Name\\%
    }%
    \end{tabular}%
}%

\newcommand{\PrintMarksDTLTable}[1]{% #1 = marks database
    \begin{tabular}{c c}%
        Number & Marks \\\hline%
    \DTLforeach{#1}{%
        \Number=number,%
        \Marks=marks%
        }{%
        \Number & \Marks\\%
    }%
    \end{tabular}%
}%
%-------------------------------------------

\newcommand*{\PrintIfRegNumberIsInMarks}[3]{%
    % #1 = reg number
    % #2 = name
    % #3 = marks db
    %
    \DTLforeach{#3}{\Number=number, \Marks=marks}{%
        \IfEq{#1}{\Number}{% Found member
            #1 & #2 & \Marks \\%
            \dtlbreak% Done, break out of loop
        }{% Haven't found it yet, so keep looking....
        }%
    }%
}%

\newcommand{\PrintNamesWithMarksDTLTable}[2]{%
    % #1 = names database
    % #2 = marks database
    \begin{tabular}{c c c}%
        Reg No & Name & Marks\\\hline%
        \DTLforeach{#1}{%
            \RegNo=Reg.No.,%
            \Name=Name%
            }{%
                \PrintIfRegNumberIsInMarks{\RegNo}{\Name}{#2}%
            }%
    \end{tabular}%
}%


\begin{document}
\DTLloaddb{NamesDB}{namespgf.csv}
%\PrintNamesDTLTable{NamesDB}% Useful for debugging

\DTLloaddb{MarksDB}{markspgf.csv}
%\PrintMarksDTLTable{MarksDB}% Useful for debugging

\PrintNamesWithMarksDTLTable{NamesDB}{MarksDB}
\end{document}

I realize that you're looking for a pgfplotstable solution only, but as food for thought I'll add another option. This uses the namespgf.csv to generate a set of key-value pairs. When the marks are written, the name corresponding to the registration number is inserted. I made a couple of mock spreadsheets with ~3000 entries (using numbers rather than names) and it took ~15s to compile. I'm not sure how that would compare to a pgfplotstable solution in terms of efficiency. Note that to simplify things a bit, I removed the headers from the data and added them to the table manually.

\documentclass{article}

\usepackage{xparse}
\usepackage{booktabs}
\usepackage{longtable}
\usepackage{filecontents}

\begin{filecontents*}{namespgf.csv}
5501,Kathirvelu A
5502,Gugan K
5503,Kalaitchelvi S
5504,Suresh S
5505,Mahesh K
\end{filecontents*}
%
\begin{filecontents*}{markspgf.csv}
5501,67
5502,25
5503,62
5505,95
\end{filecontents*}

\ExplSyntaxOn
\tl_new:N \g_tab_rows_tl
\ior_new:N \g_names_ior
\ior_new:N \g_marks_ior
\prop_new:N \g_names_prop

\ior_open:Nn \g_names_ior {namespgf.csv}
\ior_open:Nn \g_marks_ior {markspgf.csv}

\cs_new:Npn \set_name_keys:w #1,#2\q_stop
    {
        \prop_put:Nnn \g_names_prop {#1} {#2}% reg number is the key and name is the value
    }

\cs_new:Npn \tab_write_keys:w #1,#2\q_stop % data from grades sheet
    {
        \prop_gpop:NnN \g_names_prop {#1} \l_tmpa_tl % pop the name for the reg number
        \tl_gput_right:Nn \g_tab_rows_tl {#1&} % number 1st
        \tl_gput_right:NV \g_tab_rows_tl \l_tmpa_tl % name 2nd
        \tl_gput_right:Nn \g_tab_rows_tl {&#2\\} % grade 3rd
    }

\ior_str_map_inline:Nn \g_names_ior
    {
        \set_name_keys:w #1\q_stop
    }

\ior_str_map_inline:Nn \g_marks_ior
    {
        \tab_write_keys:w #1\q_stop
    }

\ior_close:N \g_names_ior
\ior_close:N \g_marks_ior

\NewDocumentCommand { \WriteRows } {}
    {
        \tl_use:N \g_tab_rows_tl
    }

\ExplSyntaxOff

\begin{document}

\begin{longtable}{clc}
Reg.No.&Name&Marks\\
\toprule
\WriteRows
\bottomrule
\end{longtable}

\end{document}