u.twoha.cc/ctf/csaw/rev_archeology.md
2024-09-13 19:49:18 -05:00

11 KiB

title date tags
CSAW CTF 2024 Quals: Reversing - Archeology 2024/09/12
ctf
rev

Task

While researching Egyptian artifacts, an Archaeology student discovers a series of incomprehensible hieroglyphs. Suspecting a custom cipher, she uncovers an ancient cipher machine and a matching hieroglyph dictionary. Can you help her decipher a small part of the secret message?

chal hieroglyphs.txt message.txt

  • Author: keeboi
  • Points: 426
  • Solves: 110 / 1181 (9.314%)

Writeup

We are provided with three files: a binary that encrypts our input into some hieroglyphs (chal), a text file used by the binary (hieroglyphs.txt), and an encrypted message (message.txt).

After running a decompiler on chal (angr is used here), we can see that main can be divided into a few sections.

First, a function called washing_machine is run on the first argument:

int main(unsigned long a0, struct_1 *a1)
{
    /* ... variable declarations ... */
    v17 = &v15;
    alloca(0x12000);
    if ((unsigned int)a0 != 2)
    {
        printf("Usage: %s <data-to-encrypt>\n", (unsigned int)a1->field_0);
        return 1;
    }
    v12 = 3721182122;
    v13 = 238;
    v6 = 5;
    v9 = &a1->field_8->field_0;
    v7 = strlen(v9);
    v1 = 0;
    printf("Encrypted data: ");
    washing_machine(v9, v7);

Then, the program iterates over our input to write something to v14 and calls runnnn and washing_machine:

    for (v2 = 0; v2 < v7; v2 += 1)
    {
        v18 = v1;
        v1 = (unsigned int)v18 + 1;
        *((char *)(&v14 + v18)) = 0;
        v19 = v1;
        v1 = (unsigned int)v19 + 1;
        (&v14)[v19] = 1;
        v20 = v1;
        v1 = (unsigned int)v20 + 1;
        (&v14)[v20] = v9[v2];
        for (v3 = 0; v3 <= 9; v3 += 1)
        {
            /* more of the same stuff, omitted for brevity */
        }
        v35 = v1;
        v1 = (unsigned int)v35 + 1;
        (&v14)[v35] = 4;
        v36 = v1;
        v1 = (unsigned int)v36 + 1;
        (&v14)[v36] = 1;
        v37 = v1;
        v1 = (unsigned int)v37 + 1;
        (&v14)[v37] = v2;
    }
    v38 = v1;
    v1 = (unsigned int)v38 + 1;
    (&v14)[v38] = 7;
    runnnn(&v14);
    washing_machine(&memory, v7);

Finally, the program references hieroglyphs.txt and memory to print out the encrypted input to the screen:

    v10 = &fopen("hieroglyphs.txt", "r")->_flags;
    if (!v10)
    {
        perror("Failed to open hieroglyphs.txt");
        return 1;
    }
    for (v4 = 0; fgets(&(&v11)[0x100 * v4], 0x100, v10) && v4 <= 255; v4 += 1)
    {
        (&v11)[0x100 * v4 + strcspn(&(&v11)[0x100 * v4], "\n")] = 0;
    }
    fclose(v10);
    for (v5 = 0; v5 < v7; v5 += 1)
    {
        v0 = *(&(&memory)[v5]);
        printf("%s", (unsigned int)&(&v11)[0x100 * v0]);
    }
    putchar(10);
    exit(0); /* do not return */
}

The decompiler output for washing_machine is fairly straightforward:

void washing_machine(char *a0, unsigned long a1)
{
    char v0;  // [bp-0x1b]
    char v1;  // [bp-0x1a]
    char v2;  // [bp-0x19]
    unsigned long v3;  // [bp-0x18]
    unsigned long v4;  // [bp-0x10]

    v0 = *(a0);
    for (v3 = 1; v3 < a1; v3 += 1)
    {
        v2 = a0[v3] ^ v0;
        a0[v3] = v2;
        v0 = v2;
    }
    for (v4 = 0; v4 < a1 >> 1; v4 += 1)
    {
        v1 = a0[v4];
        /* index seems to be wrong here, should be (-1 + a1 - v4) */
        a0[v4] = a0[1 + a1 + -1 * v4];
        a0[1 + a1 + -1 * v4] = v1;
    }
    return;
}

Translating this to Python, we get:

def washing_machine(buf):
    n = len(buf)
    # xor bytes with the previous byte
    for i in range(1, n):
        buf[i] ^= buf[i - 1]
    # reverse buf
    for i in range(n // 2):
        j = n - i - 1
        buf[i], buf[j] = buf[j], buf[i]

def unwash(buf):
    n = len(buf)
    for i in range(n // 2):
        j = n - i - 1
        buf[i], buf[j] = buf[j], buf[i]
    # repeating the xor undoes it
    for i in range(n - 1, 0, -1):
        buf[i] ^= buf[i - 1]

Now lets look at runnnn:

void runnnn(unsigned long a0)
{
    /* variable decs */

    v5 = 0;
    v6 = 1;
    while (v6)
    {
        v8 = v5;
        v5 = (unsigned int)v8 + 1;
        v0 = v8[a0];
        v9 = v0;
        switch ((unsigned int)v9)
        {
        case 0:
            v11 = v5;
            v5 = (unsigned int)v11 + 1;
            v1 = *((char *)(a0 + v11));
            v12 = v5;
            v5 = (unsigned int)v12 + 1;
            *(&(&regs)[v1]) = *((char *)(v12 + a0));
            break;
        case 1:
            v13 = v5;
            v5 = (unsigned int)v13 + 1;
            v1 = *((char *)(a0 + v13));
            v14 = v5;
            v5 = (unsigned int)v14 + 1;
            v4 = *((char *)(a0 + v14));
            *(&(&regs)[v1]) = *(&(&regs)[v1]) ^ *(&(&regs)[v4]);
            break;
        case 2:
            v15 = v5;
            v5 = (unsigned int)v15 + 1;
            v1 = *((char *)(a0 + v15));
            v16 = v5;
            v5 = (unsigned int)v16 + 1;
            v2 = *((char *)(a0 + v16));
            *(&(&regs)[v1]) = *(&(&regs)[v1]) << (v2 & 31) | (unsigned int)(*(&(&regs)[v1]) >> ((char)(8 - v2) & 31));
            break;
        case 3:
            v17 = v5;
            v5 = (unsigned int)v17 + 1;
            v1 = *((char *)(a0 + v17));
            *(&(&regs)[v1]) = *(&(&sbox)[*(&(&regs)[v1])]);
            break;
        case 4:
            v18 = v5;
            v5 = (unsigned int)v18 + 1;
            v1 = *((char *)(a0 + v18));
            v19 = v5;
            v5 = (unsigned int)v19 + 1;
            v3 = *((char *)(a0 + v19));
            *(&(&memory)[v3]) = *(&(&regs)[v1]);
            break;
        case 5:
            v20 = v5;
            v5 = (unsigned int)v20 + 1;
            v1 = *((char *)(a0 + v20));
            v21 = v5;
            v5 = (unsigned int)v21 + 1;
            v3 = *((char *)(a0 + v21));
            *(&(&regs)[v1]) = *(&(&memory)[v3]);
            break;
        case 6:
            v22 = v5;
            v5 = (unsigned int)v22 + 1;
            v1 = *((char *)(a0 + v22));
            putchar(*(&(&regs)[v1]));
            break;
        case 7:
            v6 = 0;
            break;
        case 8:
            v23 = v5;
            v5 = (unsigned int)v23 + 1;
            v1 = *((char *)(a0 + v23));
            v24 = v5;
            v5 = (unsigned int)v24 + 1;
            v2 = *((char *)(a0 + v24));
            *(&(&regs)[v1]) = (unsigned int)(*(&(&regs)[v1]) >> (v2 & 31)) | *(&(&regs)[v1]) << ((char)(8 - v2) & 31);
            break;
        default:
            puts("Invalid instruction");
            v6 = 0;
            break;
        }
    }
    return;
}

It seems that this function implements some kind of virtual machine (we can see regs, memory, and "Invalid instruction"), with the argument being a buffer containing the bytecode. Thus, we can assume that the loop over our input in main generates the instructions to be executed.

While we could go through the tedious process of determining the effect of each instruction generated and writing a function to undo these operations, it's possible that the entire process can be reduced to a mapping from input bytes to output bytes. To check this, we can write the following script:

from pwn import gdb, context

# suppress pwntools output
context.log_level = 'CRITICAL'

prog = './chal'

# modified to return a new array instead of modifying in-place
def unwash(buf):
    n = len(buf)
    b = buf[::-1]
    for i in range(n - 1, 0, -1):
        b[i] ^= b[i - 1]
    return b

# run the program and dump the memory buffer
def dump(n, out):
    # get the input that will be [n..n+64] after washing_machine
    arg = bytes(unwash(list(range(n, n + 64))))
    try:
        # run the program, break after the call to runnnn, then dump memory
        p = gdb.debug([prog, arg], gdbscript=f'''
            b *main+940
            c
            dump memory {out} &memory ((char*)&memory)+64
            exit
        ''')
        p.wait()
    except EOFError:
        pass

# the program segfaults if our input is too big, so we split the dump into 4 runs
for i in range(4):
    dump(64 * i, f'/tmp/dump{i}.bin')

# get the mapping
mapping = []
for i in range(4):
    with open(f'/tmp/dump{i}.bin', 'rb') as f:
        mapping.extend(f.read())

# assert that we got a bijection
assert len(mapping) == 256 == len(set(mapping))
print(mapping)
# [14, 106, 19, 58, 0, 209, 250, 226, 228, 176, 142, 35, 100, 158, 93, 194, 224, 51, 112, 189, 39, 46, 6, 136, 132, 197, 78, 215, 192, 146, 30, 152, 185, 151, 67, 71, 96, 234, 252, 214, 143, 178, 175, 11, 188, 122, 230, 7, 104, 243, 22, 182, 187, 3, 193, 135, 212, 221, 54, 216, 222, 62, 26, 149, 5, 229, 75, 12, 248, 155, 17, 219, 47, 235, 110, 61, 164, 177, 118, 191, 84, 166, 114, 60, 150, 167, 91, 134, 37, 203, 156, 8, 43, 148, 179, 133, 169, 218, 144, 138, 55, 87, 4, 171, 139, 128, 119, 145, 206, 130, 85, 254, 236, 217, 196, 48, 80, 36, 50, 198, 65, 99, 69, 123, 227, 244, 29, 174, 40, 20, 113, 111, 159, 109, 180, 79, 86, 81, 208, 165, 246, 88, 53, 15, 245, 131, 207, 21, 66, 213, 33, 204, 52, 240, 163, 233, 251, 121, 34, 72, 31, 49, 108, 57, 25, 223, 160, 120, 23, 105, 201, 41, 172, 195, 92, 28, 70, 45, 68, 129, 102, 173, 82, 168, 210, 242, 74, 77, 232, 205, 73, 2, 24, 32, 44, 237, 42, 116, 140, 181, 16, 199, 13, 225, 137, 9, 76, 56, 117, 97, 190, 124, 18, 83, 184, 89, 115, 161, 38, 98, 90, 101, 147, 186, 231, 157, 253, 183, 200, 107, 125, 239, 127, 63, 211, 154, 247, 141, 94, 126, 103, 170, 162, 95, 1, 241, 153, 202, 27, 249, 64, 59, 10, 238, 220, 255]

It appears our assumption is correct since we successfully get a bijection from input bytes to output bytes.

The final part of main seems to just read each line of hieroglyphs.txt, iterate over each byte of memory, and print the corresponding line. To reverse this, we can add this to our Python script:

with open('hieroglyphs.txt', 'r') as f:
    h_mapping = f.read().split('\n')
with open('message.txt', 'r') as f:
    msg = f.read()
memory = [h_mapping.index(c) for c in msg]
unwashed_memory = unwash(memory)
washed_input = [mapping.index(b) for b in unwashed_memory]
unwashed_input = unwash(washed_input)
print(bytes(unwashed_input).decode())

Running our script now gives us the flag: csawctf{w41t_1_54w_7h353_5ymb0l5_47_7h3_m3t_71m3_70_r34d_b00k_0f_7h3_d34d}.