A cube of text ݀

Python 2, 228 223 221 203 199 195 189

t=input()
x=" "
l=len(t)-1
q=l-1
f=range(q,0,-1)
print x*l+t
for i in f:print x*i+t[l-i]+x*q+t[i]+x*(q-i)+t[i]
print t[::-1]+x*q+t[0]
for i in f:print t[i]+x*q+t[l-i]+x*(i-1)+t[l-i]
print t

Python 3, 192 188 182

t=input()
x=" "
l=len(t)-1
q=l-1
p=print
f=range(q,0,-1)
p(x*l+t)
for i in f:p(x*i+t[l-i]+x*q+t[i]+x*(q-i)+t[i])
p(t[::-1]+x*q+t[0])
for i in f:p(t[i]+x*q+t[l-i]+x*(i-1)+t[l-i])
p(t)

Ruby, 148 144 bytes

+1 byte from the n flag. Shows newlines instead of semicolons for readability (same functionality).

S=" "
X=S*s=$_.size-2
puts X+S+I=$_,(r=1..s).map{|i|c=I[~i];S*(s-i+1)+I[i]+X+c+S*~-i+c},I.reverse+X+I[0],r.map{|i|c=I[i];I[~i]+X+c+S*(s-i)+c},I

Run like so. Input is a line of STDIN, with no trailing newline, so it likely needs to be piped from file.

ruby -ne 'S=" ";X=S*s=$_.size-2;puts X+S+I=$_,(r=1..s).map{|i|c=I[~i];S*(s-i+1)+I[i]+X+c+S*~-i+c},I.reverse+X+I[0],r.map{|i|c=I[i];I[~i]+X+c+S*(s-i)+c},I'

x86 (IA-32) machine code, 126 bytes

Hexdump:

60 8b f9 57 33 c0 f2 ae 5e 2b fe 4f 87 fa 8d 1c
12 8b c3 48 f6 e3 c6 04 07 00 48 c6 04 07 20 75
f9 8b ea 4d 53 8d 04 2a 50 53 8b c5 f6 e3 8d 44
68 01 50 53 2b c2 8b c8 50 4b 53 55 53 03 c5 50
f7 d3 53 50 53 95 f6 e2 6b c0 04 50 43 53 51 6a
01 4a 52 6a 01 50 6a ff 51 b0 0a 6a 0b 8b dc 59
8b 6c cb fc 88 04 2f 03 2c cb 89 6c cb fc 83 f9
0a 75 01 ac e2 ea 4a 79 e0 83 c4 58 61 c3

This is a bit long, so to explain it I'll give C code first:

void doit(const char* s, char out[])
{
    int n = strlen(s);
    int w = 2 * n;
    int h = w - 1;
    int m = n - 1;

    memset(out, ' ', h * w);
    out[h * w] = 0;

    int offset1 = n + m;
    int offset2 = w * m + 2 * m + 1; // 2 * n * n - 1
    int offset3 = offset2 - n; // 2 * n * n - n - 1
    int offset4 = 4 * n * m; // 4 * n * n - 4 * n

    int offsets[] = {
        offset3, -1,
        offset4, 1,
        m, 1,
        offset3, 1 - w,
        offset4, -w,
        offset2 - 1, -w,
        offset2 - 1, w - 1,
        m, w - 1,
        offset3, w,
        offset2, w,
        offset1, w,
    };

    do
    {
        char c = *s++;
        for (int i = 0; i < 11; ++i)
        {
            if (i == 9)
                c = '\n';
            int offset = offsets[i * 2];
            assert(offset > 0 && offset < w * h);
            out[offset] = c;
            offsets[i * 2] += offsets[i * 2 + 1];
        }
    } while (--n);
}

Here n is the length of the input string.

The dimensions of the output area are 2n (width) by 2n-1 (height). First, it fills everything with spaces (and adds a terminating null byte). Then, it travels along 11 straight lines in the output area, and fills them with text:

  • 2 lines are filled with end-of-line bytes (=10)
  • 9 lines are filled with the consecutive bytes of the input string

Each line is represented by two numbers, a start offset and a stride. I stuffed them both into the array offsets, to make access "easy".

The interesting part is filling the array. There is little importance for the order of the entries in the array; I tried to rearrange them to minimize the number of register conflicts. In addition, quadratic formulas have some freedom in choosing the way of calculation; I tried to minimize the number of subtractions (because additions can be implemented by the flexible LEA instruction).

Assembly source:

    pushad;

    ; // Calculate the length of the input string
    mov edi, ecx;
    push edi;
    xor eax, eax;
    repne scasb;
    pop esi; // esi = input string
    sub edi, esi;
    dec edi;

    ; // Calculate the size of the output area
    xchg edi, edx;  // edx = n
                    // edi = output string
    lea ebx, [edx + edx]; // ebx = w
    mov eax, ebx;
    dec eax; // eax = h
    mul bl; // eax = w * h

    ; // Fill the output string with spaces and zero terminate it
    mov byte ptr [edi + eax], 0;
myfill:
    dec eax;
    mov byte ptr [edi + eax], ' ';
    jnz myfill;

    mov ebp, edx;
    dec ebp; // ebp = m

    ; // Fill the array of offsets
    push ebx; // w
    lea eax, [edx + ebp];
    push eax; // offset1
    push ebx; // w
    mov eax, ebp;
    mul bl;
    lea eax, [eax + 2 * ebp + 1];
    push eax; // offset2
    push ebx; // w
    sub eax, edx;
    mov ecx, eax; // ecx = offset3
    push eax; // offset3
    dec ebx;
    push ebx; // w - 1
    push ebp; // m
    push ebx; // w - 1
    add eax, ebp;
    push eax; // offset2 - 1
    not ebx;
    push ebx; // -w
    push eax; // offset2 - 1
    push ebx; // -w
    xchg eax, ebp; // eax = m
    mul dl;
    imul eax, eax, 4;
    push eax; // offset4
    inc ebx;
    push ebx; // 1 - w
    push ecx; // offset3
    push 1;
    dec edx; // edx = n - 1
    push edx;
    push 1;
    push eax;
    push -1;
    push ecx;

    ; // Use the array of offsets to write stuff to output
myout:
    mov al, '\n';
    push 11;
    mov ebx, esp;
    pop ecx;
myloop:
    mov ebp, [ebx + ecx * 8 - 4];
    mov [edi + ebp], al;
    add ebp, [ebx + ecx * 8];
    mov [ebx + ecx * 8 - 4], ebp;
    cmp ecx, 10;
    jne skip_read;
    lodsb;
skip_read:
    loop myloop;
    dec edx;
    jns myout;

    add esp, 11 * 8;

    popad;
    ret;

I used byte multiplications here, limiting the length of the input string to 127. This avoids clobbering the register edx - the product is calculated in ax instead.

A minor glitch: when filling the array, the length of the string gets decreased by 1. So I adjusted the loop exit condition:

    jns myout

It counts down to -1.