R3CTF - Thief Writeup

Author: Ernesto Martínez García

Tags: r3ctf ctf misc

This was an easy challenge related to Computer Vision:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn.functional as F
import pandas as pd
import random
import os

print("""
...
""")
print(r"The model effect of Cain's is always not as good as that of rec.")
print(r"A little bird told me that rec only used a portion of the training set.")
print(r"Can you tell me the data below, which ones are in this model's training set")

train_indices = pd.read_csv('/app/train_indices.csv').iloc[1:, 0].tolist()
all_numbers = list(range(0, 10000))
trained = random.sample(train_indices, 250)
untrained = random.sample(list(set(all_numbers) - set(train_indices)), 250)
num = trained + untrained
random.shuffle(num)

transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR100(root="/app/data", train=False, transform=transform)

rec_model = models.resnet18(weights=None)
rec_model.fc = nn.Linear(in_features=rec_model.fc.in_features, out_features=100)
rec_model.load_state_dict(torch.load(r"/app/rec's model.pth", map_location='cpu'))
rec_model.eval()

score = 0.75 + 0.1
cnt = 0
for idx in num:
    output = rec_model(trainset[idx][0].unsqueeze(0))
    prob = F.softmax(output, dim=1) 
    top_10, _ = torch.topk(prob, 10)

    print(f"idx : {idx}")
    print(f"top_10_pred : {top_10.tolist()}")
    inp = input('Is this picture in the training set?')
    ismember = 1 if idx in train_indices else 0
    if ismember == int(inp):
        cnt+=1

if cnt/len(num) > score:
    print(os.environ.get("GZCTF_FLAG"))
    exit()

print("No No No.You can't be the THEIF!!!")

You had to predict in a image recognition model if a certain image was used to train the model or not.

I basically ran a few tests and quickly saw that there was a big gap, so I decided that all the images with > 0.9 score were in the training set:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This exploit template was generated via:
# $ pwn template --host ctf2024-entry.r3kapig.com --port 31759
from pwn import *

# Set up pwntools for the correct architecture
context.update(arch='i386')
exe = './path/to/binary'

# Many built-in settings can be controlled on the command-line and show up
# in "args".  For example, to dump all data sent/received, and disable ASLR
# for all created processes...
# ./exploit.py DEBUG NOASLR
# ./exploit.py GDB HOST=example.com PORT=4141 EXE=/tmp/executable
host = args.HOST or 'ctf2024-entry.r3kapig.com'
port = int(args.PORT or 31759)

def start_local(argv=[], *a, **kw):
    '''Execute the target binary locally'''
    if args.GDB:
        return gdb.debug([exe] + argv, gdbscript=gdbscript, *a, **kw)
    else:
        return process([exe] + argv, *a, **kw)

def start_remote(argv=[], *a, **kw):
    '''Connect to the process on the remote host'''
    io = connect(host, port)
    if args.GDB:
        gdb.attach(io, gdbscript=gdbscript)
    return io

def start(argv=[], *a, **kw):
    '''Start the exploit against the target.'''
    if args.LOCAL:
        return start_local(argv, *a, **kw)
    else:
        return start_remote(argv, *a, **kw)

# Specify your GDB script here for debugging
# GDB will be launched if the exploit is run via e.g.
# ./exploit.py GDB
gdbscript = '''
continue
'''.format(**locals())

#===========================================================
#                    EXPLOIT GOES HERE
#===========================================================

io = start()

more_than_90 = 0
more_than_95 = 0
more_than_99 = 0

for i in range(500):
    log.info(f"Iteration {i}")
    io.recvuntil(b"top_10_pred : ")
    l = io.recvline().decode().strip().replace('[', '').replace(']', '').replace(' ', '').split(',')
    l = [float(i) for i in l]
    log.info(f"  - [0] {round(l[0], 4)}")
    log.info(f"  - [1] {round(l[1], 4)}")
    if(l[0] > 0.90 and l[1] < 0.20):
        log.info(f"  - Yes")
        io.sendlineafter(b"Is this picture in the training set?", b"1")
    else:
        log.info(f"  - No")
        io.sendlineafter(b"Is this picture in the training set?", b"0")

    if l[0] > 0.99:
        more_than_99 += 1
    elif l[0] > 0.95:
        more_than_95 += 1
    elif l[0] > 0.9:
        more_than_90 += 1

log.info(f"More than 0.90: {more_than_90} ({more_than_90/500})")
log.info(f"More than 0.95: {more_than_95} ({more_than_95/500})")
log.info(f"More than 0.99: {more_than_99} ({more_than_99/500})")

io.interactive()