Automatically reorder floats to fill page

Here is a sketch of a solution implemented in expl3. To make my life easy I assume that all floats are stored in external files (though of course one could do this differently). To be workable one would need to provide a user interface and finish of the typesetting part of the implementation.

I left in a few \show_... commands (commented out) to show intermediate steps if anybody is interested what is inside the various data structures.

\begin{filecontents*}{A}
This is box one

\vspace{4in}

The end of box one
\end{filecontents*}
\begin{filecontents*}{B}
This is box two

\vspace{4in}

The end of box two
\end{filecontents*}
\begin{filecontents*}{C}
This is box thre

\vspace{2in}

The end of box three
\end{filecontents*}
\begin{filecontents*}{D}
This is box four

\vspace{1in}

The end of box four
\end{filecontents*}
\documentclass{article}

\usepackage{expl3}

\ExplSyntaxOn

% use "fs" for "floats sorted" as module name

% ----------------------------------------------

% box for measuring float ht
\box_new:N \l_fs_float_box

% property list holding float heights (key float file name)
\prop_new:N \l_fs_float_hts_prop

% token list storing float names in braces for sorting
\tl_new:N \l_fs_floats_tl

% sequence holding floats sorted by size
\seq_new:N \l_fs_sorted_floats_seq

% counter for allocated pages
\int_new:N \l_fs_pages_int

% sequence of allocated pages
\seq_new:N \l_fs_alloced_pages_seq

% property list holding available space on page (key = page number)
\prop_new:N \l_fs_page_hts_prop

% ----------------------------------------------

% load a float file (arg = file name)
\cs_new:Npn \fs_load_float_file:n #1 {
% measure
   \vbox_set:Nn \l_fs_float_box {  \input{#1}  }
   \prop_put:NnV \l_fs_float_hts_prop {#1} { \box_ht:N \l_fs_float_box }
% store for sorting
   \tl_put_right:Nn \l_fs_floats_tl { {#1} }
 }

% ----------------------------------------------

% sorting 
\cs_new:Npn \fs_sort_sizes: {
%set up definition for quicksort code
   \cs_set_nopar:Npn \prg_quicksort_compare:nnTF ##1##2  {
% get two float heights and compare
      \prop_get:NnN \l_fs_float_hts_prop {##1} \l_tmpa_tl
      \prop_get:NnN \l_fs_float_hts_prop {##2} \l_tmpb_tl
      \dim_compare:nNnTF \l_tmpa_tl < \l_tmpb_tl
   }
% what to do with each item in the sorted sequence:
  \cs_set_nopar:Npn\prg_quicksort_function:n ##1{\seq_put_right:Nn \l_fs_sorted_floats_seq{ ##1} }
% run the quicksort on the content of \l_fs_floats_tl
  \exp_args:No \prg_quicksort:n \l_fs_floats_tl
}

% ----------------------------------------------

% place all floats according to the following algorithm:
%
% Loop through all of the boxes:
%   Select the largest box
 %  Put it on the first page which has space, starting a new page if necessary

\cs_new:Npn \fs_place_floats:n #1 {
% map over all floats already sorted by size:
  \seq_map_inline:Nn \l_fs_sorted_floats_seq 
    {
% being simpleminded ... we allocate a new page for every float just in case (may not use
%  it later but then we know there is one if necessary :-)
     \fs_alloc_new_page:n {#1}
% get the height of the current float
      \prop_get:NnN \l_fs_float_hts_prop {##1} \l_tmpa_tl
% now map over all allocated pages so far:
      \seq_map_inline:Nn \l_fs_alloced_pages_seq
         {
% get the available space for current page:
            \prop_get:NnN \l_fs_page_hts_prop {####1} \l_tmpb_tl
% compae that with float size
            \dim_compare:nNnF \l_tmpa_tl > \l_tmpb_tl
                {
% if the float fits onto the page then:
%    - change the available space on that page
                   \dim_set:Nn \l_tmpa_dim { \l_tmpb_tl - \l_tmpa_tl }
                   \prop_put:NnV \l_fs_page_hts_prop {####1} \l_tmpa_dim
%    - and save the float name in a sequence associated with the page 
                   \seq_put_right:cn {fs_page_   ####1  _seq } {##1}
%   - finally break out of this loop as we are done
                   \seq_map_break:
                }
         }
    }
}

% ----------------------------------------------

% alloc a new page or rather the data structures for it. Arg is the iitial page height
\cs_new:Npn  \fs_alloc_new_page:n #1 {
% use a number to generate page "names"
  \int_incr:N \l_fs_pages_int
% put in an initial page height
  \prop_put:Non \l_fs_page_hts_prop { \int_use:N \l_fs_pages_int} {#1}
% provide an empty sequence that later on hold floats put on this page
  \seq_clear:c {fs_page_   
                        \int_use:N \l_fs_pages_int 
                        _seq }
% put the new page name into the sequence holding allocated pages
  \seq_put_right:NV \l_fs_alloced_pages_seq  \l_fs_pages_int
}

% ----------------------------------------------

% typeset the floats that should by now all be distrupted into the page sequences
\cs_new:Npn  \fs_typeset_floats: {
% map over all allocated pages (one could put some randomness here so not to get first all the big floats)
 \seq_map_inline:Nn \l_fs_alloced_pages_seq
    {
% with out randomness the first page sequence without a float means we are done
      \seq_if_empty:cTF {fs_page_  ##1 _seq }
         { \seq_map_break: }
         {
% but if it contains float names ... we better typeset the floats one by one
            \seq_map_inline:cn {fs_page_  ##1 _seq }
                {  \fs_typeset_float:nn {##1} {####1} }
         }
    }
}

% I'm not actually doing any typesetting just saying what should happen ...
% which is why I also passed the page name as argument
\cs_new:Npn  \fs_typeset_float:nn #1#2 {
   \typeout { Typeset~ float~ '#2'~ on~ page~ '#1' }
}

% ----------------------------------------------

\begin{document}

% ------------------ load floats ...

\fs_load_float_file:n{A}
\fs_load_float_file:n{C}
\fs_load_float_file:n{D}
\fs_load_float_file:n{B}

%\prop_show:N  \l_fs_float_hts_prop
%\tl_show:N \l_fs_floats_tl

% ----------------  sort them
\fs_sort_sizes:

%\seq_show:N \l_fs_sorted_floats_seq

% --------------- place them
 \fs_place_floats:n{\textheight}

%\prop_show:N  \l_fs_page_hts_prop
% \seq_show:c {fs_page_1_seq }
% \seq_show:c {fs_page_2_seq }
% \seq_show:c {fs_page_3_seq }

% --------------- typeset them
 \fs_typeset_floats: 

\end{document}

I was slower than Frank because I spent a long time fine-tuning the sorting algorithms in LaTeX3. To run the code below, you should grab the "trial" l3sort package from the svn repository, or wait for us to decide to move it to a proper experimental package, on CTAN.

Apart from that, the whole thing is a pretty naive implementation of the algorithm you describe (see in particular the structure of \BinPackOutput).

  1. First collect all the boxes in a common main_box, one after the other, keeping track of the vertical size of each box, together where they appear in the main_box (boxes are stacked starting at the bottom of that main box, which can be accessed with the TeX primitive \lastbox).

  2. Then sort the list of pairs {index}{dimension} by decreasing dimension.

  3. Loop through those pairs (\seq_map_inline:Nn \g_binpack_main_seq). For each one, try pages in turn (\prg_stepwise_inline:nnnn), testing if the new box fits (\dim_compare:nNnF). If it does, then put the box there and break out of the loop over possible pages. If none of the pages had space, make a new page, and put the box there.

  4. In step 3, we construct for each page a sequence of the boxes that we plan to put there. Extracting a given box from the main_box is done in a dirty way: \box_gset_to_last:N removes the last box and assigns it to the given variable. If we repeat that asignment, we remove boxes one by one. Hence, the method I decided to use is to copy the main_box, then remove boxes N times and grabbing the (N+1)-st one, which is item N in the main_box.

Boxes are typeset in a frame, and we add a vfil amount of space around each. It turns out that my particular example is very full; in general boxes will be separated each by the same amount, hence are spread out nicely along the page.

\documentclass{article}
\usepackage{lipsum}
\usepackage{expl3,l3sort}
\ExplSyntaxOn
\box_new:N \g_binpack_main_box
\box_new:N \g_binpack_tmpa_box
\box_new:N \g_binpack_tmpb_box
\int_new:N \g_binpack_label_int
\seq_new:N \g_binpack_main_seq
\dim_new:N \g_binpack_hsize_dim
\dim_new:N \g_binpack_vsize_dim
\dim_new:N \g_binpack_extra_dim
\int_new:N \g_binpack_page_int
\bool_new:N \g_binpack_success_bool
\newcommand{\BinPackInit}
  {
    \box_gclear:N \g_binpack_main_box
    \box_gclear:N \g_binpack_tmpa_box
    \box_gclear:N \g_binpack_tmpb_box
    \seq_gclear:N \g_binpack_main_seq
    \int_gzero:N  \g_binpack_label_int
    \int_gzero:N  \g_binpack_page_int
    \dim_gset:Nn  \g_binpack_vsize_dim { \vsize }
    \dim_gset:Nn  \g_binpack_hsize_dim { \hsize - 2\fboxsep - 2\fboxrule }
    \dim_gset:Nn  \g_binpack_extra_dim { 2\fboxsep + 2\fboxrule }
  }
\newenvironment {testbox}
  {
    \vbox_gset:Nw \g_binpack_tmpa_box
      \color_group_begin: \color_ensure_current:
        \dim_set_eq:NN \hsize \g_binpack_hsize_dim
  }
  {
      \color_group_end:
    \vbox_gset_end:
    \binpack_gpush_box:N \g_binpack_tmpa_box
    \int_gincr:N \g_binpack_label_int
  }
\newcommand{\BinPackOutput}
  {
    \binpack_sort:
    \binpack_pack:
    \binpack_build:
  }
\cs_new_protected:Npn \binpack_gpush_box:N #1
  {
    \seq_gpush:Nx \g_binpack_main_seq
      {
        { \int_use:N \g_binpack_label_int }
        { \dim_eval:n { \box_ht:N #1 + \box_dp:N #1 + \g_binpack_extra_dim } }
      }
    \vbox_gset:Nn \g_binpack_main_box
      {
        \box_use:N #1
        \vbox_unpack_clear:N \g_binpack_main_box
      }
  }
\cs_new_protected:Npn \binpack_box_item:n #1
  {
    \vbox_gset:Nn \g_binpack_tmpa_box
      {
        \vbox_unpack:N \g_binpack_main_box
        \prg_replicate:nn { #1 + 1 }
          { \box_gset_to_last:N \g_binpack_tmpb_box }
      }
    \box_use_drop:N \g_binpack_tmpb_box
  }
\cs_new_protected:Npn \binpack_sort:
  {
    \seq_gsort:Nn \g_binpack_main_seq
      { \binpack_sort_aux:nnnn ##1 ##2 }
  }
\cs_new_protected:Npn \binpack_sort_aux:nnnn #1#2 #3#4
  {
    \dim_compare:nNnTF { #2 } < { #4 }
      { \sort_return_swapped: } { \sort_return_same: }
  }
\cs_new_protected:Npn \binpack_pack:
  {
    \int_gzero:N \g_binpack_page_int
    \seq_map_inline:Nn \g_binpack_main_seq { \binpack_pack:nn ##1 }
  }
\cs_new_protected:Npn \binpack_pack:nn #1#2
  {
    \bool_gset_false:N \g_binpack_success_bool
    \prg_stepwise_inline:nnnn
      { 0 } { 1 } { \g_binpack_page_int - 1 }
      {
        \dim_compare:nNnF
          { \dim_use:c { g_binpack_page_##1_dim } + #2 }
          > \g_binpack_vsize_dim
          {
            \binpack_page_gput:nnn {##1} {#1} {#2}
            \bool_gset_true:N \g_binpack_success_bool
            \prg_map_break:
          }
      }
    \bool_if:NF \g_binpack_success_bool
      {
        \binpack_page_new:
        \binpack_page_gput:xnn
          { \int_eval:n { \g_binpack_page_int - 1 } } {#1} {#2}
      }
  }
\cs_new_protected:Npn \binpack_page_gput:nnn #1#2#3
  {
    \seq_gput_right:cn { g_binpack_page_#1_seq } {#2}
    \dim_gadd:cn { g_binpack_page_#1_dim } {#3}
  }
\cs_generate_variant:Nn \binpack_page_gput:nnn { x }
\cs_new_protected:Npn \binpack_page_new:
  {
    \dim_new:c { g_binpack_page_ \int_use:N \g_binpack_page_int _dim }
    \seq_new:c { g_binpack_page_ \int_use:N \g_binpack_page_int _seq }
    \int_gincr:N \g_binpack_page_int
  }
\cs_new_protected:Npn \binpack_build:
  {
    \prg_stepwise_inline:nnnn
      { 0 } { 1 } { \g_binpack_page_int - 1 }
      {
        \iow_term:x
          { items:~\seq_map_function:cN { g_binpack_page_##1_seq } \c_space_tl }
        \seq_map_inline:cn
          { g_binpack_page_##1_seq }
          {
            \binpack_frame_box:n { \binpack_box_item:n { ####1 } }
            \tex_vfil:D
          }
        \clearpage
      }
  }
\cs_new_protected:Npn \binpack_frame_box:n #1
  {
    \vbox
      {
        \hrule height \fboxrule
        \hbox
          {
            \vrule width \fboxrule
            \kern \fboxsep
            \vbox { \kern \fboxsep #1 \kern \fboxsep }
            \kern \fboxsep
            \vrule width \fboxrule
          }
        \hrule height \fboxrule
      }
  }
\ExplSyntaxOff
\begin{document}

\footnotesize

\BinPackInit

\begin{testbox}
\lipsum[1-2]
\end{testbox}

\begin{testbox}
\lipsum[3-4]
\end{testbox}

\begin{testbox}
\lipsum[1-3]
\end{testbox}

\begin{testbox}
\lipsum[3-5]
\end{testbox}

\begin{testbox}
\lipsum[1-5]
\end{testbox}

\begin{testbox}
\lipsum[3-6]
\end{testbox}

\begin{testbox}
\lipsum[2]
\end{testbox}

\begin{testbox}
\lipsum[3]
\end{testbox}

\clearpage

\BinPackOutput

\end{document}