321 lines
11 KiB
Markdown
321 lines
11 KiB
Markdown
|
|
---
|
||
|
|
title: 'CSAW CTF 2024 Quals: Reversing - Archeology'
|
||
|
|
date: 2024/09/12
|
||
|
|
tags: ['ctf', 'rev']
|
||
|
|
---
|
||
|
|
|
||
|
|
## Task
|
||
|
|
|
||
|
|
> While researching Egyptian artifacts, an Archaeology student discovers a series of incomprehensible hieroglyphs. Suspecting a custom cipher, she uncovers an ancient cipher machine and a matching hieroglyph dictionary. Can you help her decipher a small part of the secret message?
|
||
|
|
>
|
||
|
|
> [`chal`](https://ctf.csaw.io/files/024ec940cc04eee0a93d2b8af8e5271b/chal?token=eyJ1c2VyX2lkIjoyNjc1LCJ0ZWFtX2lkIjo5MDUsImZpbGVfaWQiOjI5fQ.Zt_SNg.1oxzWU8rIIRxpXdKwybpSukzv7E) [`hieroglyphs.txt`](https://ctf.csaw.io/files/12d35b03b6f134b15a58aae0c49a2c20/hieroglyphs.txt?token=eyJ1c2VyX2lkIjoyNjc1LCJ0ZWFtX2lkIjo5MDUsImZpbGVfaWQiOjMxfQ.Zt_SNg.0Owx0lX6ed13BojCxTwYvewPjws) [`message.txt`](https://ctf.csaw.io/files/2efde5dd515025b55561749742cc9a30/message.txt?token=eyJ1c2VyX2lkIjoyNjc1LCJ0ZWFtX2lkIjo5MDUsImZpbGVfaWQiOjMyfQ.Zt_SNg.1ZzbRrdYp8zAgAp9jBH3PIHaGDI)
|
||
|
|
|
||
|
|
- `Author: keeboi`
|
||
|
|
- `Points: 426`
|
||
|
|
- `Solves: 110 / 1181 (9.314%)`
|
||
|
|
|
||
|
|
## Writeup
|
||
|
|
|
||
|
|
We are provided with three files: a binary that encrypts our input into some hieroglyphs (`chal`), a text file used by the binary (`hieroglyphs.txt`), and an encrypted message (`message.txt`).
|
||
|
|
|
||
|
|
After running a decompiler on `chal` (`angr` is used here), we can see that `main` can be divided into a few sections.
|
||
|
|
|
||
|
|
First, a function called `washing_machine` is run on the first argument:
|
||
|
|
|
||
|
|
```c
|
||
|
|
int main(unsigned long a0, struct_1 *a1)
|
||
|
|
{
|
||
|
|
/* ... variable declarations ... */
|
||
|
|
v17 = &v15;
|
||
|
|
alloca(0x12000);
|
||
|
|
if ((unsigned int)a0 != 2)
|
||
|
|
{
|
||
|
|
printf("Usage: %s <data-to-encrypt>\n", (unsigned int)a1->field_0);
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
v12 = 3721182122;
|
||
|
|
v13 = 238;
|
||
|
|
v6 = 5;
|
||
|
|
v9 = &a1->field_8->field_0;
|
||
|
|
v7 = strlen(v9);
|
||
|
|
v1 = 0;
|
||
|
|
printf("Encrypted data: ");
|
||
|
|
washing_machine(v9, v7);
|
||
|
|
```
|
||
|
|
|
||
|
|
Then, the program iterates over our input to write something to `v14` and calls `runnnn` and `washing_machine`:
|
||
|
|
|
||
|
|
```c
|
||
|
|
for (v2 = 0; v2 < v7; v2 += 1)
|
||
|
|
{
|
||
|
|
v18 = v1;
|
||
|
|
v1 = (unsigned int)v18 + 1;
|
||
|
|
*((char *)(&v14 + v18)) = 0;
|
||
|
|
v19 = v1;
|
||
|
|
v1 = (unsigned int)v19 + 1;
|
||
|
|
(&v14)[v19] = 1;
|
||
|
|
v20 = v1;
|
||
|
|
v1 = (unsigned int)v20 + 1;
|
||
|
|
(&v14)[v20] = v9[v2];
|
||
|
|
for (v3 = 0; v3 <= 9; v3 += 1)
|
||
|
|
{
|
||
|
|
/* more of the same stuff, omitted for brevity */
|
||
|
|
}
|
||
|
|
v35 = v1;
|
||
|
|
v1 = (unsigned int)v35 + 1;
|
||
|
|
(&v14)[v35] = 4;
|
||
|
|
v36 = v1;
|
||
|
|
v1 = (unsigned int)v36 + 1;
|
||
|
|
(&v14)[v36] = 1;
|
||
|
|
v37 = v1;
|
||
|
|
v1 = (unsigned int)v37 + 1;
|
||
|
|
(&v14)[v37] = v2;
|
||
|
|
}
|
||
|
|
v38 = v1;
|
||
|
|
v1 = (unsigned int)v38 + 1;
|
||
|
|
(&v14)[v38] = 7;
|
||
|
|
runnnn(&v14);
|
||
|
|
washing_machine(&memory, v7);
|
||
|
|
```
|
||
|
|
|
||
|
|
Finally, the program references `hieroglyphs.txt` and `memory` to print out the encrypted input to the screen:
|
||
|
|
|
||
|
|
```c
|
||
|
|
v10 = &fopen("hieroglyphs.txt", "r")->_flags;
|
||
|
|
if (!v10)
|
||
|
|
{
|
||
|
|
perror("Failed to open hieroglyphs.txt");
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
for (v4 = 0; fgets(&(&v11)[0x100 * v4], 0x100, v10) && v4 <= 255; v4 += 1)
|
||
|
|
{
|
||
|
|
(&v11)[0x100 * v4 + strcspn(&(&v11)[0x100 * v4], "\n")] = 0;
|
||
|
|
}
|
||
|
|
fclose(v10);
|
||
|
|
for (v5 = 0; v5 < v7; v5 += 1)
|
||
|
|
{
|
||
|
|
v0 = *(&(&memory)[v5]);
|
||
|
|
printf("%s", (unsigned int)&(&v11)[0x100 * v0]);
|
||
|
|
}
|
||
|
|
putchar(10);
|
||
|
|
exit(0); /* do not return */
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
The decompiler output for `washing_machine` is fairly straightforward:
|
||
|
|
|
||
|
|
```c
|
||
|
|
void washing_machine(char *a0, unsigned long a1)
|
||
|
|
{
|
||
|
|
char v0; // [bp-0x1b]
|
||
|
|
char v1; // [bp-0x1a]
|
||
|
|
char v2; // [bp-0x19]
|
||
|
|
unsigned long v3; // [bp-0x18]
|
||
|
|
unsigned long v4; // [bp-0x10]
|
||
|
|
|
||
|
|
v0 = *(a0);
|
||
|
|
for (v3 = 1; v3 < a1; v3 += 1)
|
||
|
|
{
|
||
|
|
v2 = a0[v3] ^ v0;
|
||
|
|
a0[v3] = v2;
|
||
|
|
v0 = v2;
|
||
|
|
}
|
||
|
|
for (v4 = 0; v4 < a1 >> 1; v4 += 1)
|
||
|
|
{
|
||
|
|
v1 = a0[v4];
|
||
|
|
/* index seems to be wrong here, should be (-1 + a1 - v4) */
|
||
|
|
a0[v4] = a0[1 + a1 + -1 * v4];
|
||
|
|
a0[1 + a1 + -1 * v4] = v1;
|
||
|
|
}
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
Translating this to Python, we get:
|
||
|
|
|
||
|
|
```py
|
||
|
|
def washing_machine(buf):
|
||
|
|
n = len(buf)
|
||
|
|
# xor bytes with the previous byte
|
||
|
|
for i in range(1, n):
|
||
|
|
buf[i] ^= buf[i - 1]
|
||
|
|
# reverse buf
|
||
|
|
for i in range(n // 2):
|
||
|
|
j = n - i - 1
|
||
|
|
buf[i], buf[j] = buf[j], buf[i]
|
||
|
|
|
||
|
|
def unwash(buf):
|
||
|
|
n = len(buf)
|
||
|
|
for i in range(n // 2):
|
||
|
|
j = n - i - 1
|
||
|
|
buf[i], buf[j] = buf[j], buf[i]
|
||
|
|
# repeating the xor undoes it
|
||
|
|
for i in range(n - 1, 0, -1):
|
||
|
|
buf[i] ^= buf[i - 1]
|
||
|
|
```
|
||
|
|
|
||
|
|
Now lets look at `runnnn`:
|
||
|
|
|
||
|
|
```c
|
||
|
|
void runnnn(unsigned long a0)
|
||
|
|
{
|
||
|
|
/* variable decs */
|
||
|
|
|
||
|
|
v5 = 0;
|
||
|
|
v6 = 1;
|
||
|
|
while (v6)
|
||
|
|
{
|
||
|
|
v8 = v5;
|
||
|
|
v5 = (unsigned int)v8 + 1;
|
||
|
|
v0 = v8[a0];
|
||
|
|
v9 = v0;
|
||
|
|
switch ((unsigned int)v9)
|
||
|
|
{
|
||
|
|
case 0:
|
||
|
|
v11 = v5;
|
||
|
|
v5 = (unsigned int)v11 + 1;
|
||
|
|
v1 = *((char *)(a0 + v11));
|
||
|
|
v12 = v5;
|
||
|
|
v5 = (unsigned int)v12 + 1;
|
||
|
|
*(&(®s)[v1]) = *((char *)(v12 + a0));
|
||
|
|
break;
|
||
|
|
case 1:
|
||
|
|
v13 = v5;
|
||
|
|
v5 = (unsigned int)v13 + 1;
|
||
|
|
v1 = *((char *)(a0 + v13));
|
||
|
|
v14 = v5;
|
||
|
|
v5 = (unsigned int)v14 + 1;
|
||
|
|
v4 = *((char *)(a0 + v14));
|
||
|
|
*(&(®s)[v1]) = *(&(®s)[v1]) ^ *(&(®s)[v4]);
|
||
|
|
break;
|
||
|
|
case 2:
|
||
|
|
v15 = v5;
|
||
|
|
v5 = (unsigned int)v15 + 1;
|
||
|
|
v1 = *((char *)(a0 + v15));
|
||
|
|
v16 = v5;
|
||
|
|
v5 = (unsigned int)v16 + 1;
|
||
|
|
v2 = *((char *)(a0 + v16));
|
||
|
|
*(&(®s)[v1]) = *(&(®s)[v1]) << (v2 & 31) | (unsigned int)(*(&(®s)[v1]) >> ((char)(8 - v2) & 31));
|
||
|
|
break;
|
||
|
|
case 3:
|
||
|
|
v17 = v5;
|
||
|
|
v5 = (unsigned int)v17 + 1;
|
||
|
|
v1 = *((char *)(a0 + v17));
|
||
|
|
*(&(®s)[v1]) = *(&(&sbox)[*(&(®s)[v1])]);
|
||
|
|
break;
|
||
|
|
case 4:
|
||
|
|
v18 = v5;
|
||
|
|
v5 = (unsigned int)v18 + 1;
|
||
|
|
v1 = *((char *)(a0 + v18));
|
||
|
|
v19 = v5;
|
||
|
|
v5 = (unsigned int)v19 + 1;
|
||
|
|
v3 = *((char *)(a0 + v19));
|
||
|
|
*(&(&memory)[v3]) = *(&(®s)[v1]);
|
||
|
|
break;
|
||
|
|
case 5:
|
||
|
|
v20 = v5;
|
||
|
|
v5 = (unsigned int)v20 + 1;
|
||
|
|
v1 = *((char *)(a0 + v20));
|
||
|
|
v21 = v5;
|
||
|
|
v5 = (unsigned int)v21 + 1;
|
||
|
|
v3 = *((char *)(a0 + v21));
|
||
|
|
*(&(®s)[v1]) = *(&(&memory)[v3]);
|
||
|
|
break;
|
||
|
|
case 6:
|
||
|
|
v22 = v5;
|
||
|
|
v5 = (unsigned int)v22 + 1;
|
||
|
|
v1 = *((char *)(a0 + v22));
|
||
|
|
putchar(*(&(®s)[v1]));
|
||
|
|
break;
|
||
|
|
case 7:
|
||
|
|
v6 = 0;
|
||
|
|
break;
|
||
|
|
case 8:
|
||
|
|
v23 = v5;
|
||
|
|
v5 = (unsigned int)v23 + 1;
|
||
|
|
v1 = *((char *)(a0 + v23));
|
||
|
|
v24 = v5;
|
||
|
|
v5 = (unsigned int)v24 + 1;
|
||
|
|
v2 = *((char *)(a0 + v24));
|
||
|
|
*(&(®s)[v1]) = (unsigned int)(*(&(®s)[v1]) >> (v2 & 31)) | *(&(®s)[v1]) << ((char)(8 - v2) & 31);
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
puts("Invalid instruction");
|
||
|
|
v6 = 0;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
It seems that this function implements some kind of virtual machine (we can see `regs`, `memory`, and `"Invalid instruction"`), with the argument being a buffer containing the bytecode. Thus, we can assume that the loop over our input in `main` generates the instructions to be executed.
|
||
|
|
|
||
|
|
While we could go through the tedious process of determining the effect of each instruction generated and writing a function to undo these operations, it's possible that the entire process can be reduced to a mapping from input bytes to output bytes. To check this, we can write the following script:
|
||
|
|
|
||
|
|
```py
|
||
|
|
from pwn import gdb, context
|
||
|
|
|
||
|
|
# suppress pwntools output
|
||
|
|
context.log_level = 'CRITICAL'
|
||
|
|
|
||
|
|
prog = './chal'
|
||
|
|
|
||
|
|
# modified to return a new array instead of modifying in-place
|
||
|
|
def unwash(buf):
|
||
|
|
n = len(buf)
|
||
|
|
b = buf[::-1]
|
||
|
|
for i in range(n - 1, 0, -1):
|
||
|
|
b[i] ^= b[i - 1]
|
||
|
|
return b
|
||
|
|
|
||
|
|
# run the program and dump the memory buffer
|
||
|
|
def dump(n, out):
|
||
|
|
# get the input that will be [n..n+64] after washing_machine
|
||
|
|
arg = bytes(unwash(list(range(n, n + 64))))
|
||
|
|
try:
|
||
|
|
# run the program, break after the call to runnnn, then dump memory
|
||
|
|
p = gdb.debug([prog, arg], gdbscript=f'''
|
||
|
|
b *main+940
|
||
|
|
c
|
||
|
|
dump memory {out} &memory ((char*)&memory)+64
|
||
|
|
exit
|
||
|
|
''')
|
||
|
|
p.wait()
|
||
|
|
except EOFError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# the program segfaults if our input is too big, so we split the dump into 4 runs
|
||
|
|
for i in range(4):
|
||
|
|
dump(64 * i, f'/tmp/dump{i}.bin')
|
||
|
|
|
||
|
|
# get the mapping
|
||
|
|
mapping = []
|
||
|
|
for i in range(4):
|
||
|
|
with open(f'/tmp/dump{i}.bin', 'rb') as f:
|
||
|
|
mapping.extend(f.read())
|
||
|
|
|
||
|
|
# assert that we got a bijection
|
||
|
|
assert len(mapping) == 256 == len(set(mapping))
|
||
|
|
print(mapping)
|
||
|
|
# [14, 106, 19, 58, 0, 209, 250, 226, 228, 176, 142, 35, 100, 158, 93, 194, 224, 51, 112, 189, 39, 46, 6, 136, 132, 197, 78, 215, 192, 146, 30, 152, 185, 151, 67, 71, 96, 234, 252, 214, 143, 178, 175, 11, 188, 122, 230, 7, 104, 243, 22, 182, 187, 3, 193, 135, 212, 221, 54, 216, 222, 62, 26, 149, 5, 229, 75, 12, 248, 155, 17, 219, 47, 235, 110, 61, 164, 177, 118, 191, 84, 166, 114, 60, 150, 167, 91, 134, 37, 203, 156, 8, 43, 148, 179, 133, 169, 218, 144, 138, 55, 87, 4, 171, 139, 128, 119, 145, 206, 130, 85, 254, 236, 217, 196, 48, 80, 36, 50, 198, 65, 99, 69, 123, 227, 244, 29, 174, 40, 20, 113, 111, 159, 109, 180, 79, 86, 81, 208, 165, 246, 88, 53, 15, 245, 131, 207, 21, 66, 213, 33, 204, 52, 240, 163, 233, 251, 121, 34, 72, 31, 49, 108, 57, 25, 223, 160, 120, 23, 105, 201, 41, 172, 195, 92, 28, 70, 45, 68, 129, 102, 173, 82, 168, 210, 242, 74, 77, 232, 205, 73, 2, 24, 32, 44, 237, 42, 116, 140, 181, 16, 199, 13, 225, 137, 9, 76, 56, 117, 97, 190, 124, 18, 83, 184, 89, 115, 161, 38, 98, 90, 101, 147, 186, 231, 157, 253, 183, 200, 107, 125, 239, 127, 63, 211, 154, 247, 141, 94, 126, 103, 170, 162, 95, 1, 241, 153, 202, 27, 249, 64, 59, 10, 238, 220, 255]
|
||
|
|
```
|
||
|
|
|
||
|
|
It appears our assumption is correct since we successfully get a bijection from input bytes to output bytes.
|
||
|
|
|
||
|
|
The final part of `main` seems to just read each line of `hieroglyphs.txt`, iterate over each byte of `memory`, and print the corresponding line. To reverse this, we can add this to our Python script:
|
||
|
|
|
||
|
|
```python
|
||
|
|
with open('hieroglyphs.txt', 'r') as f:
|
||
|
|
h_mapping = f.read().split('\n')
|
||
|
|
with open('message.txt', 'r') as f:
|
||
|
|
msg = f.read()
|
||
|
|
memory = [h_mapping.index(c) for c in msg]
|
||
|
|
unwashed_memory = unwash(memory)
|
||
|
|
washed_input = [mapping.index(b) for b in unwashed_memory]
|
||
|
|
unwashed_input = unwash(washed_input)
|
||
|
|
print(bytes(unwashed_input).decode())
|
||
|
|
```
|
||
|
|
|
||
|
|
Running our script now gives us the flag: `csawctf{w41t_1_54w_7h353_5ymb0l5_47_7h3_m3t_71m3_70_r34d_b00k_0f_7h3_d34d}`.
|