Logo Peter

DEFCON 2025 Quals

April 14, 2025
17 min read
Table of Contents

This time I played DEFCON CTF Quals with RePokemonedCollections - a merger of r3kapig and Russian teams. We managed to get top 14th. I solved 3 challenges but two of them are not interesting so in this blog I will share my solution to last one I did.

nfuncs1 (12 solves)

n-series autorev

Every year, DEFCON has a n-series autorev which is a hard asf. I’ve never thought, I could solve an autorev in DEFCON lol. So what is autorev this time?

Download challenge and you recognise that you have 3gb PE file 🤣

Load it into IDA (1 hour to finish). Lets analysis it

int sub_140001480()
{
  __int64 index; // rax
  DWORD flOldProtect; // [rsp+24h] [rbp-24h] BYREF
  char key[8]; // [rsp+28h] [rbp-20h]
 
  VirtualProtect(check1, 0xDAF0ui64, 0x40u, &flOldProtect);
  *(_QWORD *)key = 0x918FE64752F3F1BDui64;
  index = 1i64;
  *(_BYTE *)check1 ^= 0xBDu;
  do
  {
    *((_BYTE *)check1 + index) ^= key[index & 7];
    ++index;
  }
  while ( index != 0xDAF0 );
  VirtualProtect(check1, 0xDAF0ui64, flOldProtect, 0i64);
  return check1();
}

As you could see this one is just xor shellcode and call VirtualProtect to change the protection on a region of committed pages and execute it.

int check1()
{
  char *v0; // rax
  char *v1; // rcx
  char *v2; // rcx
  char *v3; // rcx
  __int64 i; // rcx
  char *v5; // rcx
  __int64 j; // rax
  char flOldProtect[3]; // [rsp+29h] [rbp-12Fh] BYREF
  DWORD lpflOldProtect_; // [rsp+2Ch] [rbp-12Ch] BYREF
  __int64 flOldProtect_7; // [rsp+30h] [rbp-128h]
  char Buffer[260]; // [rsp+3Dh] [rbp-11Bh] BYREF
  unsigned __int8 DstBuf[8]; // [rsp+148h] [rbp-10h] BYREF
 
  *(_QWORD *)DstBuf = 0i64;
  memset(&Buffer[3], 0, 256);
  v0 = &Buffer[3];
  v1 = &Buffer[3];
  do
    *v1++ = 0;
  while ( v1 != &Buffer[259] );
  v2 = &Buffer[3];
  do
    *v2++ = 1;
  while ( v2 != &Buffer[259] );
  v3 = &Buffer[3];
  do
    *v3++ = 2;
  while ( v3 != &Buffer[259] );
  for ( i = 1i64; i != 257; ++i )
    Buffer[i + 2] = i;
  v5 = &Buffer[3];
  do
  {
    *v5 = 2 - (unsigned __int8)&Buffer[3] + (_BYTE)v5;
    ++v5;
  }
  while ( v5 != &Buffer[259] );
  do
  {
    *v0 = 13 - (unsigned __int8)&Buffer[3] + (_BYTE)v0;
    ++v0;
  }
  while ( v0 != &Buffer[259] );
  read(0, DstBuf, 1u);
  read(0, &DstBuf[1], 1u);
  read(0, &DstBuf[2], 1u);
  read(0, &DstBuf[3], 1u);
  read(0, &DstBuf[4], 1u);
  read(0, &DstBuf[5], 1u);
  read(0, &DstBuf[6], 1u);
  read(0, &DstBuf[7], 1u);
  if ( Buffer[DstBuf[0] + 3] == (char)0x96
    && Buffer[DstBuf[1] + 3] == 0x5D
    && Buffer[DstBuf[2] + 3] == 0x5B
    && Buffer[DstBuf[3] + 3] == 0x54
    && Buffer[DstBuf[4] + 3] == 0x1A
    && Buffer[DstBuf[5] + 3] == 0x17
    && Buffer[DstBuf[6] + 3] == 0x27
    && Buffer[DstBuf[7] + 3] == 0x17 )
  {
    strcpy(flOldProtect, ":)");
    puts(flOldProtect);
    flOldProtect_7 = __ROL8__(
                       __ROR8__(__ROL8__(*(_QWORD *)DstBuf + 2641108059i64, 13) ^ 0xDEADBEEFCAFEBABEui64, 7) ^ 0x123456789ABCDEF0i64,
                       5);
    VirtualProtect(lpAddress_, 0x12280ui64, 0x40u, &lpflOldProtect_);
    for ( j = 0i64; j != 74368; ++j )
      *(_BYTE *)(j + 0x14000F000i64) ^= Buffer[(j & 7) - 3];
    VirtualProtect(lpAddress_, 0x12280ui64, lpflOldProtect_, 0i64);
    return lpAddress_();
  }
  else
  {
    strcpy(Buffer, ":(");
    return puts(Buffer);
  }
}

So basically a layer of one check will do this: call read 8 times than do something than cmp -> if our input is correct then use it and generate new key for decrypting next block of shellcode. When I did this challenge, at first I think about unicorn to emulate and shellcode and based on pattern to solve it

from unicorn import *
from unicorn.x86_const import *
import struct
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from pwn import *
 
png_data = []
def read(name):
    with open(name, "rb") as f:
        return f.read()
        
def u32(data):
    return struct.unpack("I", data)[0]
    
def p32(num):
    return struct.pack("I", num)
 
def solve(buf):
    check = []
    cmp = [13, 13, 13, 0x83, 13, 13, 13, 0xA2]
    for byte in cmp:
        for i in range(0xff):
            if buf[i+3] == byte:
                check.append(i)
    return check
    
md = Cs(CS_ARCH_X86, CS_MODE_64)
def hook_code(mu, address, size, user_data):  
    code = mu.mem_read(address, size)
    disasm = list(md.disasm(code, address))
    insn = disasm[0]
    RIP_value = mu.reg_read(UC_X86_REG_RIP)
    # print(f"0x{address:016x}: {insn.mnemonic} {insn.op_str}")
    if insn.mnemonic == "mov" and insn.op_str == "qword ptr [rsp + 0x30], rax":
        print("[!] Breakpoint hit: mov qword ptr [rsp + 0x30], rax")
        print(p64(mu.reg_read(UC_X86_REG_RAX)))
        # new key
        mu.emu_stop()
        return
    
    if RIP_value == 0x0000000000400207:
        buff_add = mu.reg_read(UC_X86_REG_RAX)
    elif RIP_value == 0x0000000000400218:
        rsp = mu.reg_read(UC_X86_REG_RSP)
        effective_addr = rsp + 61
        value = mu.mem_read(effective_addr, 260)
        print(value)
        sols = solve(value)
        print(bytes(sols))
        mu.mem_write(mu.reg_read(UC_X86_REG_RDX), bytes(sols))
        mu.reg_write(UC_X86_REG_RIP, RIP_value + 0xaf - 8)
    if RIP_value == 0x000000000040037f:
        mu.reg_write(UC_X86_REG_RIP, RIP_value + 6)
 
 
mu = Uc(UC_ARCH_X86, UC_MODE_64)
 
BASE = 0x400000  
STACK_ADDR = 0x800000  
STACK_SIZE = 2 * 1024 * 1024  
 
STACK_ADDR = (STACK_ADDR // 0x1000) * 0x1000
STACK_SIZE = (STACK_SIZE // 0x1000) * 0x1000
 
mu.mem_map(BASE, 1024 * 1024)  
mu.mem_map(STACK_ADDR, STACK_SIZE)  
 
mu.mem_write(BASE, read("./test3"))
mu.reg_write(UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE - 0x100)
 
mu.hook_add(UC_HOOK_CODE, hook_code)
 
start_addr = BASE 
end_addr = BASE + 0x3b9 + 4
 
try:
    mu.emu_start(start_addr, end_addr)
except UcError as e:
    print(f"Unicorn Error: {e}")

This would be solved for the lookup table check. The idea of my emulator is simple hook read than jump right after 8th read and than lets it run and handle puts by skipping it. But this type of check is one of more check. This is second type of check.

int check2()
{
  __int64 i; // rax
  char flOldProtect[3]; // [rsp+21h] [rbp-27h] BYREF
  DWORD lpflOldProtect_; // [rsp+24h] [rbp-24h] BYREF
  __int64 flOldProtect_7; // [rsp+28h] [rbp-20h]
  char Buffer[3]; // [rsp+35h] [rbp-13h] BYREF
  char DstBuf[8]; // [rsp+38h] [rbp-10h] BYREF
 
  read(0, DstBuf, 1u);
  read(0, &DstBuf[1], 1u);
  read(0, &DstBuf[2], 1u);
  read(0, &DstBuf[3], 1u);
  read(0, &DstBuf[4], 1u);
  read(0, &DstBuf[5], 1u);
  read(0, &DstBuf[6], 1u);
  read(0, &DstBuf[7], 1u);
  if ( !(DstBuf[2] | (unsigned __int8)(DstBuf[1] | DstBuf[0])) && *(_DWORD *)&DstBuf[3] == 'DHI\r' && DstBuf[7] == 82 )
  {
    strcpy(flOldProtect, ":)");
    puts(flOldProtect);
    flOldProtect_7 = __ROL8__(
                       __ROR8__(__ROL8__(*(_QWORD *)DstBuf + 605849054i64, 13) ^ 0xDEADBEEFCAFEBABEui64, 7) ^ 0x123456789ABCDEF0i64,
                       5);
    VirtualProtect(Address, 0x631Eui64, 0x40u, &lpflOldProtect_);
    for ( i = 0i64; i != 25374; ++i )
      *(_BYTE *)(i + 0x140021280i64) ^= Buffer[(i & 7) - 3];
    VirtualProtect(Address, 0x631Eui64, lpflOldProtect_, 0i64);
    return ((int (__stdcall *)())Address)();
  }
  else
  {
    strcpy(Buffer, ":(");
    return puts(Buffer);
  }
}

I will call this is direct-compare check. So from now we could see there 2 types of check and only one key generate algo. I did manually next 3 checks and saw new pattern.

// local variable allocation has failed, the output may be wrong!
void __fastcall check4(int a1, int a2, __int64 a3, int _EAX)
{
  __int64 i; // rax
  char happy[3]; // [rsp+29h] [rbp-2Fh] OVERLAPPED BYREF
  DWORD addresss_3; // [rsp+2Ch] [rbp-2Ch] BYREF
  _QWORD addresss_7[2]; // [rsp+30h] [rbp-28h] BYREF
  char sad[3]; // [rsp+45h] [rbp-13h] BYREF
  char inp[8]; // [rsp+48h] [rbp-10h] BYREF
 
  read(0, inp, 1u);
  read(0, &inp[1], 1u);
  read(0, &inp[2], 1u);
  read(0, &inp[3], 1u);
  read(0, &inp[4], 1u);
  read(0, &inp[5], 1u);
  read(0, &inp[6], 1u);
  read(0, &inp[7], 1u);
  if ( inp[0] == '\b'
    && inp[1] == '\x06'
    && !(inp[4] | (unsigned __int8)(inp[3] | inp[2]))
    && inp[5] == (char)'\x8D'
    && inp[6] == 'w'
    && inp[7] == '\x0E' )
  {
    strcpy(happy, ":)");
    puts(happy);
    addresss_7[0] = 0i64;
    addresss_7[1] = 0i64;
    keygen((unsigned __int8 *)addresss_7, (char *)(*(_QWORD *)inp + 0x6274EB3Fi64));
    VirtualProtect(sub_14003A3BE, 0x1B692ui64, 0x40u, &addresss_3);
    for ( i = 0i64; i != 112274; ++i )
      *(_BYTE *)(i + 0x14003A3BEi64) ^= *((_BYTE *)addresss_7 + (i & 0xF));
    VirtualProtect(sub_14003A3BE, 0x1B692ui64, addresss_3, 0i64);
    sub_14003A3BE();
  }
  else
  {
    strcpy(sad, ":(");
    puts(sad);
  }
}

Yeah as you could see no more ROL stuff it called some function in the original binary. So the new problem we couldn’t emulate current shellcode. Also new pattern found in check6

__int64 check6()
{
  char *v0; // rax
  char *v1; // rcx
  char *v2; // rcx
  char *v3; // rcx
  __int64 i; // rcx
  char *v5; // rcx
  void (__fastcall *v6)(_QWORD, char *, __int64); // rbx
  unsigned __int64 v8; // rax
  int v9; // edx
  int v10; // r8d
  int v11; // edx
  __int64 *v12; // rcx
  unsigned __int8 v13; // dl
  __int64 j; // rax
  char :)[3]; // [rsp+2Dh] [rbp-23Bh] BYREF
  _QWORD v16[2]; // [rsp+30h] [rbp-238h] BYREF
  __int64 v17[2]; // [rsp+40h] [rbp-228h] BYREF
  __int64 v18[30]; // [rsp+50h] [rbp-218h] BYREF
  char :([259]; // [rsp+14Dh] [rbp-11Bh] BYREF
  char v20; // [rsp+250h] [rbp-18h] BYREF
  __int64 v21; // [rsp+258h] [rbp-10h] BYREF
 
  v21 = 0i64;
  memset(&:([3], 0, 256);
  v0 = &:([3];
  v1 = &:([3];
  do
    *v1++ = 0;
  while ( v1 != &v20 );
  v2 = &:([3];
  do
    *v2++ = 1;
  while ( v2 != &v20 );
  v3 = &:([3];
  do
    *v3++ = 2;
  while ( v3 != &v20 );
  for ( i = 1i64; i != 257; ++i )
    :([i + 2] = i;
  v5 = &:([3];
  do
  {
    *v5 = 2 - (unsigned __int8)&:([3] + (_BYTE)v5;
    ++v5;
  }
  while ( v5 != &v20 );
  do
  {
    *v0 = 13 - (unsigned __int8)&:([3] + (_BYTE)v0;
    ++v0;
  }
  while ( v0 != &v20 );
  v6 = (void (__fastcall *)(_QWORD, char *, __int64))MEMORY[0x670278E0];
  MEMORY[0x670278E0](0i64, &v21, 1i64);
  v6(0i64, (char *)&v21 + 1, 1i64);
  v6(0i64, (char *)&v21 + 2, 1i64);
  v6(0i64, (char *)&v21 + 3, 1i64);
  v6(0i64, (char *)&v21 + 4, 1i64);
  v6(0i64, (char *)&v21 + 5, 1i64);
  v6(0i64, (char *)&v21 + 6, 1i64);
  v6(0i64, (char *)&v21 + 7, 1i64);
  if ( :([(unsigned __int8)v21 + 3] == 79
    && :([BYTE1(v21) + 3] == 13
    && :([BYTE2(v21) + 3] == -69
    && :([BYTE3(v21) + 3] == -37
    && :([BYTE4(v21) + 3] == 41
    && :([BYTE5(v21) + 3] == -10
    && :([BYTE6(v21) + 3] == 13
    && :([HIBYTE(v21) + 3] == 13 )
  {
    strcpy(:), ":)");
    MEMORY[0x670278D0](:));
    v16[0] = 0i64;
    v16[1] = 0i64;
    v8 = v21 + 1045416388;
    v17[0] = 0i64;
    v17[1] = 0i64;
    memset(v18, 0, sizeof(v18));
    v9 = 1;
    v10 = 1;
    do
    {
      v10 ^= (2 * v10) ^ ((char)v10 >> 7) & 0x1B;
      v11 = (2 * v9) ^ v9 ^ (4 * ((2 * v9) ^ v9)) ^ (16 * ((2 * v9) ^ v9 ^ (4 * ((2 * v9) ^ v9))));
      v9 = ((char)v11 >> 7) & 9 ^ v11;
      *((_BYTE *)v17 + (unsigned __int8)v10) = __ROL1__(v9, 3) ^ __ROL1__(v9, 2) ^ v9 ^ __ROL1__(v9, 4) ^ __ROL1__(v9, 1) ^ 0x63;
    }
    while ( (_BYTE)v10 != 1 );
    LOBYTE(v17[0]) = 99;
    v12 = v16;
    do
    {
      v13 = *((_BYTE *)v17 + (unsigned __int8)v8);
      *(_BYTE *)v12 = v13;
      v8 = v13 ^ (v8 >> 4);
      v12 = (__int64 *)((char *)v12 + 1);
    }
    while ( v18 != v12 );
    MEMORY[0x6715C7C0](0x14006D3B0i64, 22128i64, 64i64, v17);
    for ( j = 0i64; j != 22128; ++j )
      *(_BYTE *)(j + 0x14006D3B0i64) ^= *((_BYTE *)v16 + (j & 0xF));
    MEMORY[0x6715C7C0](0x14006D3B0i64, 22128i64, LODWORD(v17[0]), 0i64);
    return MEMORY[0x67070FA0]();
  }
  else
  {
    strcpy(:(, ":(");
    return MEMORY[0x670278D0](:();
  }
}

After seeing bunch of pattern, I decided to use unicorn for emulating and angr for solving input.

If our input is correct, it will load :) and call puts. Otherwise it will load :( and also call puts. So we will use angr to find 8 bytes that’s valid to correct branch. Also we are trying to use angr on shellcode so we need to hook read() and put() to create symbolic execution.

import angr
from pwn import *
import claripy
import capstone
from iced_x86 import *
import re
def process_shellcode(file_name):
    def parse_read_puts_ptr(shellcode_path, base_rva=0x0):
        wtf = ""
        with open(shellcode_path, "rb") as f:
            shellcode = f.read()
        decoder = Decoder(64, shellcode, 0)
        READ_PTR = None
        for instr in decoder:
            wtf += str(instr) + "\n"
        match = re.search(r"mov rbx,\[([0-9A-Fa-f]+)h\]\s*call rbx", wtf)
        if match:
            hex_addr = match.group(1)
            READ_PTR = int(hex_addr, 16)
        match = re.search(
            r"lea rcx,\[rsp([+-]?[0-9A-Fa-f]+h?)\]\s*call qword ptr \[([0-9A-Fa-f]+)h\]",
            wtf,
        )
        if match:
            call_addr = match.group(2)
            call_addr_int = int(call_addr, 16)
            PUTS_PTR = call_addr_int
        return READ_PTR, PUTS_PTR
    READ_PTR, PUTS_PTR = parse_read_puts_ptr(file_name)
    p = angr.load_shellcode(open(file_name, "rb").read(), arch="amd64")
    state = p.factory.entry_state()
    state.regs.rbp = state.regs.rsp
    READ = 0x5000000
    PUTS = 0x6000000
    syminp = []
    cur = 0
    def read_hook(state):
        nonlocal cur
        s = claripy.BVS("inp_%d" % cur, 8)
        state.memory.store(state.regs.rdx, s)
        syminp.append(s)
        cur += 1
    p.hook(READ, read_hook)
    state.memory.store(READ, b"\xc3")
    state.memory.store(READ_PTR, p64(READ))
    state.memory.store(PUTS, b"\xc3")
    state.memory.store(PUTS_PTR, p64(PUTS))
    simgr = p.factory.simulation_manager(state)
    simgr.stashes["bad"] = []
    def has_failed(state):
        try:
            data = state.memory.load(state.regs.rip, 8).concrete_value.to_bytes(
                8, byteorder="big"
            )
            return b"\x3a\x28" in data
        except:
            return False
    def found_smile(simgr):
        try:
            data = (
                simgr.active[0]
                .memory.load(simgr.active[0].regs.rip, 8)
                .concrete_value.to_bytes(8, byteorder="big")
            )
            return b":)" in data
        except:
            return False
    while not found_smile(simgr):
        simgr.step()
        simgr.move("active", "bad", filter_func=has_failed)
    
    lmao = b''
    for x in syminp:
        lmao += bytes([simgr.active[0].solver.eval(x)])
    open("lmao.png", "ab").write(lmao)
    print(lmao)
process_shellcode("./test5")

Angr’s cooking !!! Our angr is easily solve first 6 checks. Now how to combine angr into unicorn emulate shellcode and auto solve it? Because of checks like 4 and 5 use different function in original binary to gen key so we need to map all section.text in original binary and emulate it

md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
 
BASE = 0x140001000
STACK_ADDR = 0x800000  
STACK_SIZE = 2 * 1024 * 1024  
STACK_ADDR = (STACK_ADDR // 0x1000) * 0x1000
STACK_SIZE = (STACK_SIZE // 0x1000) * 0x1000
 
# section_text = read("nfuncs.exe")
# section_text = section_text[0x400:0x400 + 0x6703a000] 
section_text = read("section_text")
print("[+] Done reading section text")
 
mu.mem_map(BASE, 0x6703a000)  
mu.mem_map(STACK_ADDR, STACK_SIZE)  
mu.mem_write(BASE, section_text)  
 
mu.reg_write(UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE - 0x100)
print("[+] Done mapping memory")
 
mu.hook_add(UC_HOOK_CODE, hook_code)
 
start_addr = 0x000000014003A3BE
end_addr = start_addr + 0x200
 
try:
    mu.emu_start(start_addr, end_addr)
except UcError as e:
    print(f"Unicorn Error: {e}")

start_addr in current script is check5 address since previous tests and current test have been patched. Hooking part and auto solve it:

  • We knew the flow of each check ( read -> do something -> compare -> gen new key -> decrypt next block shellcode)
  • We need to hook read() and patch our correct input to the buffer address
if "mov" in inst_diss and "rbx" in inst_diss:
        print("READ HANDLER")
        OFFSET_INP = mu.reg_read(UC_X86_REG_RDX)
        print("FOUND BUFFER ADD", hex(OFFSET_INP))
        code_to_disasm = mu.mem_read(RIP_value, 0x100)
        call_rbx_found = False
        
        for i in md.disasm(code_to_disasm, RIP_value):
            if i.mnemonic == "call" and i.op_str == "rbx":
                call_rbx_count += 1
                if call_rbx_count == 8:
                    RIP_VALUE_READ_HANDLER = i.address + i.size
                    call_rbx_found = True
                    break  
 
        mu.reg_write(UC_X86_REG_RIP, RIP_VALUE_READ_HANDLER)
        mu.mem_write(OFFSET_INP, PNG_SHIT)
  • After this we also need to hook puts() -> just skipping it because it is no need
elif "mov" in inst_diss and ", 0x293a" in inst_diss: # if it correct XD
        print("CORRECT KEY")
        mu.reg_write(UC_X86_REG_RIP, RIP_value + 0x17)
  • After this step it will call VirtualProtect and decrypt our next shellcode chunk which means we are able to dump new key, new shellcode address, new shellcode size.
elif "call" in inst_diss and "qword ptr [rip" in inst_diss:
        DEC_ADDRESS = mu.reg_read(UC_X86_REG_RIP) + 0x35 # prevent second call VirtualProtect
        print(hex(DEC_ADDRESS))
        RSP = mu.reg_read(UC_X86_REG_RSP)
        data = mu.mem_read(RSP + 0x30, 16)
        print("FOUND KEY", data)
        RDX = mu.reg_read(UC_X86_REG_RDX)
        print("size shellcode", hex(RDX))
        SHELLCODE_SIZE = RDX
        RCX = mu.reg_read(UC_X86_REG_RCX)
        SHELLCODE_CHUNK = RCX
        print("shellcode address", hex(RCX))
        mu.reg_write(UC_X86_REG_RIP, RIP_value + 0x6)
  • Finals step is combine angr solver and jump into next shellcode block
if DEC_ADDRESS != 0:
        if RIP_value > DEC_ADDRESS:
            mu.reg_write(UC_X86_REG_RIP, SHELLCODE_CHUNK)
            print("[+] Finish 1 chunk")
            open("solve.bin", "wb").write(mu.mem_read(SHELLCODE_CHUNK, SHELLCODE_SIZE))
            print(f"[+] Write shellcode to file {hex(SHELLCODE_CHUNK)} {hex(SHELLCODE_SIZE)}")
            print("Angryyyyyyyy")
            PNG_SHIT = process_shellcode("solve.bin")
            open(LMAO_PNG, "ab").write(PNG_SHIT)
            print("Current RIP", hex(mu.reg_read(UC_X86_REG_RIP)))
            DEC_ADDRESS = 0
            SHELLCODE_CHUNK = 0 
            SHELLCODE_SIZE = 0

Overall, our strategy is emulate shellcode with correct key -> decrypt next shellcode block -> put that decrypt shellcode in angr solver -> found correct input -> set rip to that chunk of shellcode -> and so on until angr solver broke XD

Finals script

from unicorn import *
from unicorn.x86_const import *
import struct
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from pwn import *
import angr
import claripy
from iced_x86 import *
import re
 
def read(name):
    with open(name, "rb") as f:
        return f.read()
 
def process_shellcode(file_name):
    def parse_read_puts_ptr(shellcode_path, base_rva=0x0):
        wtf = ""
        with open(shellcode_path, "rb") as f:
            shellcode = f.read()
        decoder = Decoder(64, shellcode, 0)
        READ_PTR = None
        for instr in decoder:
            wtf += str(instr) + "\n"
        match = re.search(r"mov rbx,\[([0-9A-Fa-f]+)h\]\s*call rbx", wtf)
        if match:
            hex_addr = match.group(1)
            READ_PTR = int(hex_addr, 16)
        match = re.search(
            r"lea rcx,\[rsp([+-]?[0-9A-Fa-f]+h?)\]\s*call qword ptr \[([0-9A-Fa-f]+)h\]",
            wtf,
        )
        if match:
            call_addr = match.group(2)
            call_addr_int = int(call_addr, 16)
            PUTS_PTR = call_addr_int
        return READ_PTR, PUTS_PTR
    READ_PTR, PUTS_PTR = parse_read_puts_ptr(file_name)
    p = angr.load_shellcode(open(file_name, "rb").read(), arch="amd64")
    state = p.factory.entry_state()
    state.regs.rbp = state.regs.rsp
    READ = 0x5000000
    PUTS = 0x6000000
    syminp = []
    cur = 0
    def read_hook(state):
        nonlocal cur
        s = claripy.BVS("inp_%d" % cur, 8)
        state.memory.store(state.regs.rdx, s)
        syminp.append(s)
        cur += 1
    p.hook(READ, read_hook)
    state.memory.store(READ, b"\xc3")
    state.memory.store(READ_PTR, p64(READ))
    state.memory.store(PUTS, b"\xc3")
    state.memory.store(PUTS_PTR, p64(PUTS))
    simgr = p.factory.simulation_manager(state)
    simgr.stashes["bad"] = []
    def has_failed(state):
        try:
            data = state.memory.load(state.regs.rip, 8).concrete_value.to_bytes(
                8, byteorder="big"
            )
            return b"\x3a\x28" in data
        except:
            return False
    def found_smile(simgr):
        try:
            data = (
                simgr.active[0]
                .memory.load(simgr.active[0].regs.rip, 8)
                .concrete_value.to_bytes(8, byteorder="big")
            )
            return b":)" in data
        except:
            return False
    while not found_smile(simgr):
        simgr.step()
        simgr.move("active", "bad", filter_func=has_failed)
    
    lmao = b''
    for x in syminp:
        lmao += bytes([simgr.active[0].solver.eval(x)])
 
    print("[+] Found correct input: ", lmao)
    return lmao
 
DEC_ADDRESS = 0
SHELLCODE_CHUNK = 0 
SHELLCODE_SIZE = 0
PNG_SHIT = b'=\x00\x00\x00\x01sRG'
 
LMAO_PNG = "lmao.png"
 
def hook_code(mu, address, size, user_data):
    global DEC_ADDRESS
    global SHELLCODE_CHUNK
    global SHELLCODE_SIZE
    global PNG_SHIT
    code = mu.mem_read(address, size)
    disasm = list(md.disasm(code, address))
    insn = disasm[0]
    RIP_value = mu.reg_read(UC_X86_REG_RIP)
    # print(f"0x{address:016x}: {insn.mnemonic} {insn.op_str}")
    if DEC_ADDRESS != 0:
        if RIP_value > DEC_ADDRESS:
            mu.reg_write(UC_X86_REG_RIP, SHELLCODE_CHUNK)
            print("[+] Finish 1 chunk")
            open("solve.bin", "wb").write(mu.mem_read(SHELLCODE_CHUNK, SHELLCODE_SIZE))
            print(f"[+] Write shellcode to file {hex(SHELLCODE_CHUNK)} {hex(SHELLCODE_SIZE)}")
            print("Angryyyyyyyy")
            PNG_SHIT = process_shellcode("solve.bin")
            # print(PNG_SHIT)
            open(LMAO_PNG, "ab").write(PNG_SHIT)
            print("Current RIP", hex(mu.reg_read(UC_X86_REG_RIP)))
            DEC_ADDRESS = 0
            SHELLCODE_CHUNK = 0 
            SHELLCODE_SIZE = 0
 
    call_rbx_count = 0
 
    if insn.mnemonic == "mov" and "rbx" in insn.op_str:
        print("READ HANDLER")
        OFFSET_INP = mu.reg_read(UC_X86_REG_RDX)
        print("FOUND BUFFER ADD", hex(OFFSET_INP))
        code_to_disasm = mu.mem_read(RIP_value, 0x100)
        call_rbx_found = False
        
        for i in md.disasm(code_to_disasm, RIP_value):
            if i.mnemonic == "call" and i.op_str == "rbx":
                call_rbx_count += 1
                if call_rbx_count == 8:
                    RIP_VALUE_READ_HANDLER = i.address + i.size
                    call_rbx_found = True
                    break  
 
        mu.reg_write(UC_X86_REG_RIP, RIP_VALUE_READ_HANDLER)
        mu.mem_write(OFFSET_INP, PNG_SHIT)
 
    elif insn.mnemonic == "mov" and ", 0x293a" in insn.op_str: # if it correct XD
        print("CORRECT KEY")
        mu.reg_write(UC_X86_REG_RIP, RIP_value + 0x17)
 
    elif insn.mnemonic == "call" and "qword ptr [rip" in insn.op_str:
        DEC_ADDRESS = mu.reg_read(UC_X86_REG_RIP) + 0x35 # prevent second call
        print(hex(DEC_ADDRESS))
        RSP = mu.reg_read(UC_X86_REG_RSP)
        data = mu.mem_read(RSP + 0x30, 16)
        print("FOUND KEY", data)
        RDX = mu.reg_read(UC_X86_REG_RDX)
        print("size shellcode", hex(RDX))
        SHELLCODE_SIZE = RDX
        RCX = mu.reg_read(UC_X86_REG_RCX)
        SHELLCODE_CHUNK = RCX
        print("shellcode address", hex(RCX))
        mu.reg_write(UC_X86_REG_RIP, RIP_value + 0x6)
 
 
md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
 
BASE = 0x140001000
STACK_ADDR = 0x800000  
STACK_SIZE = 2 * 1024 * 1024  
STACK_ADDR = (STACK_ADDR // 0x1000) * 0x1000
STACK_SIZE = (STACK_SIZE // 0x1000) * 0x1000
 
# section_text = read("nfuncs.exe")
# section_text = section_text[0x400:0x400 + 0x6703a000] 
section_text = read("section_text")
print("[+] Done reading section text")
 
mu.mem_map(BASE, 0x6703a000)  
mu.mem_map(STACK_ADDR, STACK_SIZE)  
mu.mem_write(BASE, section_text)  
 
mu.reg_write(UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE - 0x100)
print("[+] Done mapping memory")
 
mu.hook_add(UC_HOOK_CODE, hook_code)
 
start_addr = 0x000000014003A3BE
end_addr = start_addr + 0x200
 
try:
    mu.emu_start(start_addr, end_addr)
except UcError as e:
    print(f"Unicorn Error: {e}")

solutions

Running it on 16 cores 64 gb ram server will take about 3 hours to fully decrypt first flag. My script still could solve for some layers in nfuncs2 but since it uses a lot of cipher: RC4, Chacha, plus, etc.. (also it won’t use cmp anymore) it would take a long time to solve each layers

Solve is a solve? Nah, to be honest my script is a whole mess because when I finished this script the contest will end in 7 hours, my laptop ran for 3 hours and got killed, my teammate used his vps to run for rest of time. Luckily I still solved it in time (before contest ending 40 mins)

How to optimize this script:

  • Dont use capstone in hook, better use iced_x86 for faster
  • Dont use iced_x86 in hook, better use regex
  • Dont use regex, just be a better reverser (basically not be me - stupid ass)

UPDATE: new script but I dont know if its faster xD solve.py

Flag: flag{kitty_kitty_on_the_wall_439xb8q@}