Iterate over tokens

enter image description here

\documentclass{article}

\makeatletter
\def\endtest{\test!!!!}
\def\test#1{%
\par \bigskip\textbf{TESTING:} \texttt{\detokenize{#1}}\par
\testzz#1\endtest}

\def\testzz{\afterassignment\testzzz\let\tmp= }

\def\testzzz{%
\ifx\tmp\endtest
\else \texttt{\meaning\tmp}\par
\expandafter\testzz
\fi
}

\begin{document}

\test{123}

\test{There are \some arguments \in \here g}

\test{ a+ {x \sqrt{\frac}}}

\end{document}

Note that this mechanism consumes the supplied list using \let (the form with = and exactly one space is important so it does not drop spaces or = in the input) that makes it easy to detect spaces and braces but for example it only captures the meaning of the token, it can not distinguish { from \bgroup nor can it distinguish between any undefined commands, or show which name was used, \zzzfoo \undefined etc will all appear the same in the loop, as undefined.

For similar reasons, you can not re-construct anything equivalent to the original input from within the loop. given \frac{a}{b} you get essentially \frac\bgroup a\egrup\bgroup b\egroup from which it isn't possible in general to reconstruct a working fraction.

So... whether these restrictions matter depend on the intended use of the loop.


If you need it for debugging, it's a one-liner:

\documentclass{article}
\usepackage{xparse}

\ExplSyntaxOn
\NewDocumentCommand{\test}{m}
 {
  \tl_analysis_show:n { #1 }
 }
\ExplSyntaxOff

\begin{document}

\test{123}

\test{There are \some arguments \in \here g}

\test{ a+ {x \sqrt{\frac}}}

\end{document}

If you run it with pdflatex -interaction=nonstopmode, the console will show

The token list contains the tokens:
>  1 (the character 1)
>  2 (the character 2)
>  3 (the character 3).
<recently read> }

l.13 \test{123}

The token list contains the tokens:
>  T (the letter T)
>  h (the letter h)
>  e (the letter e)
>  r (the letter r)
>  e (the letter e)
>    (blank space  )
>  a (the letter a)
>  r (the letter r)
>  e (the letter e)
>    (blank space  )
>  \some (control sequence=undefined)
>  a (the letter a)
>  r (the letter r)
>  g (the letter g)
>  u (the letter u)
>  m (the letter m)
>  e (the letter e)
>  n (the letter n)
>  t (the letter t)
>  s (the letter s)
>    (blank space  )
>  \in (control sequence=\mathchar"3232=12850)
>  \here (control sequence=undefined)
>  g (the letter g).
<recently read> }

l.15 \test{There are \some arguments \in \here g}

The token list contains the tokens:
>    (blank space  )
>  a (the letter a)
>  + (the character +)
>    (blank space  )
>  { (begin-group character {)
>  x (the letter x)
>    (blank space  )
>  \sqrt (control sequence=macro:->\protect \sqrt  )
>  { (begin-group character {)
>  \frac (control sequence=macro:#1#2->{\begingroup #1\endgroup \over #2})
>  } (end-group character })
>  } (end-group character }).
<recently read> }

l.17 \test{ a+ {x \sqrt{\frac}}}

Taking the expandable code from l3tl and recoding in classical style, we might do something like

\catcode`\@=11 %

\chardef\tl@exp@end=0 %

\long\def\@firstoftwo#1#2{#1}
\long\def\@secondoftwo#1#2{#2}
\long\def\@secondofthree#1#2#3{#2}
\long\def\@gobble#1{}

\long\def\tl@if@empty#1{%
  \expandafter\ifx\expandafter\relax\detokenize{#1}\relax
    \expandafter\@secondofthree
  \fi
  \@secondoftwo
}

\long\def\tl@if@head@N#1{%
  \ifcat
    \iffalse{\fi\tl@if@head@N@aux?#1 }%
    \expandafter\@gobble\expandafter{\expandafter{\string#1?}}%
    **%
    \expandafter\@firstoftwo
  \else
    \expandafter\@secondoftwo
  \fi
}
\long\def\tl@if@head@N@aux#1 {%
  \expandafter\tl@if@empty\expandafter{\@gobble#1}{^}{}%
  \expandafter\@gobble\expandafter{\iffalse}\fi
}

\long\def\tl@if@head@group#1{%
  \ifcat\expandafter\@gobble\expandafter{\expandafter{\string#1?}}**%
    \expandafter\@secondoftwo
  \else
    \expandafter\@firstoftwo
  \fi
}

\def\q@act@mark{\q@act@mark}
\def\q@act@stop{\q@act@stop}

\long\def\tl@act#1#2#3#4#5{%
  \ifnum\iffalse{\fi`}=\z@\fi
  \tl@act@loop#5\q@act@mark\q@act@stop
  {#4}#1#2#3%
  \tl@act@result{}%
}
\long\def\tl@act@loop#1\q@act@stop{%
  \tl@if@head@N{#1}
    {\tl@act@normal}
    {%
      \tl@if@head@group{#1}
        {\tl@act@group}
        {\tl@act@space}%
    }%
  #1\q@act@stop
}
\long\def\tl@act@normal#1#2\q@act@stop#3#4{%
  \ifx\q@act@mark#1\expandafter\tl@act@end\fi
  #4{#3}#1%
  \tl@act@loop#2\q@act@stop
  {#3}#4%
}
\long\def\tl@act@group#1#2\q@act@stop#3#4#5{%
  #5{#3}{#1}%
  \tl@act@loop#2\q@act@stop
  {#3}#4#5%
}
\expandafter\long\expandafter\def\expandafter
  \tl@act@space\space#1\q@act@stop#2#3#4#5{%
    #5{#2}%
    \tl@act@loop#1\q@act@stop
    {#2}#3#4#5%
  }
\long\def\tl@act@end#1\tl@act@result#2{%
  \ifnum`{=\z@}\fi
  \tl@exp@end
  #2%
}

\long\def\iterate#1{%
  \unexpanded\expandafter{%
    \romannumeral\tl@act
      \tl@iterate@normal
      \tl@iterate@group
      \tl@iterate@space
      { }
      {#1}%
  }%
}
\long\def\tl@iterate@normal#1#2{\tl@iterate@action{\string#2}}
\long\def\tl@iterate@group#1#2{\tl@iterate@action{{\detokenize{#2}}}}
\long\def\tl@iterate@space#1{\tl@iterate@action{ }}
\long\def\tl@iterate@action#1#2\tl@act@result#3{%
  #2%
  \tl@act@result{#3|#1|}%
}

\catcode`\@=12 %

\iterate{There are \some arguments \in \here g}

\bye

(This uses e-TeX, but that can be avoided.)

The basic idea is to grab the token list and examine the first token before branching and handling as required. I've not done it, but recursion inside groups is doable. Notice that all brace groups become { ... } (unavoidable in expandable code).

Tags:

Tex Core