#include "syscall.h"
#include "interrupts.h"
#include "fs.h"
#include "process.h"
#include "spinlock.h"
#include "uaccess.h"
#include "console.h"
#include "utils.h"
#include "heap.h"
#include "tty.h"
#include "pty.h"
#include "tmpfs.h"
#include "errno.h"
#include "shm.h"
#include "socket.h"
#include "elf.h"
#include "stat.h"
#include "timer.h"
#include "vmm.h"
#include "pmm.h"
#include "hal/mm.h"
#include "hal/cpu.h"
#include "arch_signal.h"
#include "arch_syscall.h"
#include "arch_process.h"
#include "rtc.h"
#include <stddef.h>
enum {
O_APPEND = 0x400,
O_NONBLOCK = 0x800,
O_CLOEXEC = 0x80000,
};
/* Kernel-side itimerval for setitimer/getitimer (matches userland layout) */
struct k_timeval {
uint32_t tv_sec;
uint32_t tv_usec;
};
struct k_itimerval {
struct k_timeval it_interval;
struct k_timeval it_value;
};
#define ITIMER_REAL 0
#define ITIMER_VIRTUAL 1
#define ITIMER_PROF 2
#define TICKS_PER_SEC TIMER_HZ
#define USEC_PER_TICK (1000000U / TICKS_PER_SEC)
static uint32_t timeval_to_ticks(const struct k_timeval* tv) {
return tv->tv_sec * TICKS_PER_SEC + tv->tv_usec / USEC_PER_TICK;
}
static void ticks_to_timeval(uint32_t ticks, struct k_timeval* tv) {
tv->tv_sec = ticks / TICKS_PER_SEC;
tv->tv_usec = (ticks % TICKS_PER_SEC) * USEC_PER_TICK;
}
enum {
FD_CLOEXEC = 1,
};
/* --- POSIX message queues --- */
#define MQ_MAX_QUEUES 8
#define MQ_MAX_MSGS 16
#define MQ_MSG_SIZE 256
struct mq_msg {
uint8_t data[MQ_MSG_SIZE];
uint32_t len;
uint32_t prio;
};
struct mq_queue {
int active;
char name[32];
struct mq_msg msgs[MQ_MAX_MSGS];
uint32_t head;
uint32_t tail;
uint32_t count;
uint32_t maxmsg;
uint32_t msgsize;
};
static struct mq_queue mq_table[MQ_MAX_QUEUES];
static spinlock_t mq_lock = {0};
static int mq_find_by_name(const char* name) {
for (int i = 0; i < MQ_MAX_QUEUES; i++) {
if (mq_table[i].active && strcmp(mq_table[i].name, name) == 0)
return i;
}
return -1;
}
static int syscall_mq_open_impl(const char* user_name, uint32_t oflag) {
char name[32];
if (copy_from_user(name, user_name, 31) < 0) return -EFAULT;
name[31] = 0;
uintptr_t fl = spin_lock_irqsave(&mq_lock);
int idx = mq_find_by_name(name);
if (idx >= 0) {
spin_unlock_irqrestore(&mq_lock, fl);
return idx;
}
if (!(oflag & 0x40U)) { /* O_CREAT */
spin_unlock_irqrestore(&mq_lock, fl);
return -ENOENT;
}
for (int i = 0; i < MQ_MAX_QUEUES; i++) {
if (!mq_table[i].active) {
memset(&mq_table[i], 0, sizeof(mq_table[i]));
mq_table[i].active = 1;
strcpy(mq_table[i].name, name);
mq_table[i].maxmsg = MQ_MAX_MSGS;
mq_table[i].msgsize = MQ_MSG_SIZE;
spin_unlock_irqrestore(&mq_lock, fl);
return i;
}
}
spin_unlock_irqrestore(&mq_lock, fl);
return -ENOSPC;
}
static int syscall_mq_close_impl(int mqd) {
(void)mqd;
return 0;
}
static int syscall_mq_send_impl(int mqd, const void* user_buf, uint32_t len, uint32_t prio) {
if (mqd < 0 || mqd >= MQ_MAX_QUEUES) return -EBADF;
if (len > MQ_MSG_SIZE) return -EMSGSIZE;
uintptr_t fl = spin_lock_irqsave(&mq_lock);
struct mq_queue* q = &mq_table[mqd];
if (!q->active) { spin_unlock_irqrestore(&mq_lock, fl); return -EBADF; }
if (q->count >= q->maxmsg) { spin_unlock_irqrestore(&mq_lock, fl); return -EAGAIN; }
struct mq_msg* m = &q->msgs[q->tail];
spin_unlock_irqrestore(&mq_lock, fl);
if (copy_from_user(m->data, user_buf, len) < 0) return -EFAULT;
m->len = len;
m->prio = prio;
fl = spin_lock_irqsave(&mq_lock);
q->tail = (q->tail + 1) % q->maxmsg;
q->count++;
spin_unlock_irqrestore(&mq_lock, fl);
return 0;
}
static int syscall_mq_receive_impl(int mqd, void* user_buf, uint32_t len, uint32_t* user_prio) {
if (mqd < 0 || mqd >= MQ_MAX_QUEUES) return -EBADF;
uintptr_t fl = spin_lock_irqsave(&mq_lock);
struct mq_queue* q = &mq_table[mqd];
if (!q->active) { spin_unlock_irqrestore(&mq_lock, fl); return -EBADF; }
if (q->count == 0) { spin_unlock_irqrestore(&mq_lock, fl); return -EAGAIN; }
struct mq_msg* m = &q->msgs[q->head];
uint32_t mlen = m->len;
uint32_t mprio = m->prio;
if (mlen > len) mlen = len;
q->head = (q->head + 1) % q->maxmsg;
q->count--;
spin_unlock_irqrestore(&mq_lock, fl);
if (copy_to_user(user_buf, m->data, mlen) < 0) return -EFAULT;
if (user_prio) {
if (user_range_ok(user_prio, 4))
(void)copy_to_user(user_prio, &mprio, 4);
}
return (int)mlen;
}
static int syscall_mq_unlink_impl(const char* user_name) {
char name[32];
if (copy_from_user(name, user_name, 31) < 0) return -EFAULT;
name[31] = 0;
uintptr_t fl = spin_lock_irqsave(&mq_lock);
int idx = mq_find_by_name(name);
if (idx < 0) { spin_unlock_irqrestore(&mq_lock, fl); return -ENOENT; }
mq_table[idx].active = 0;
spin_unlock_irqrestore(&mq_lock, fl);
return 0;
}
/* --- POSIX named semaphores --- */
#define SEM_MAX 16
struct ksem_named {
int active;
char name[32];
int32_t value;
spinlock_t lock;
};
static struct ksem_named sem_table[SEM_MAX];
static spinlock_t sem_table_lock = {0};
static int syscall_sem_open_impl(const char* user_name, uint32_t oflag, uint32_t init_val) {
char name[32];
if (copy_from_user(name, user_name, 31) < 0) return -EFAULT;
name[31] = 0;
uintptr_t fl = spin_lock_irqsave(&sem_table_lock);
for (int i = 0; i < SEM_MAX; i++) {
if (sem_table[i].active && strcmp(sem_table[i].name, name) == 0) {
spin_unlock_irqrestore(&sem_table_lock, fl);
return i;
}
}
if (!(oflag & 0x40U)) { /* O_CREAT */
spin_unlock_irqrestore(&sem_table_lock, fl);
return -ENOENT;
}
for (int i = 0; i < SEM_MAX; i++) {
if (!sem_table[i].active) {
memset(&sem_table[i], 0, sizeof(sem_table[i]));
sem_table[i].active = 1;
strcpy(sem_table[i].name, name);
sem_table[i].value = (int32_t)init_val;
spin_unlock_irqrestore(&sem_table_lock, fl);
return i;
}
}
spin_unlock_irqrestore(&sem_table_lock, fl);
return -ENOSPC;
}
static int syscall_sem_close_impl(int sid) {
(void)sid;
return 0;
}
static int syscall_sem_wait_impl(int sid) {
if (sid < 0 || sid >= SEM_MAX) return -EINVAL;
extern void process_sleep(uint32_t ticks);
for (;;) {
uintptr_t fl = spin_lock_irqsave(&sem_table[sid].lock);
if (!sem_table[sid].active) {
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
return -EINVAL;
}
if (sem_table[sid].value > 0) {
sem_table[sid].value--;
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
return 0;
}
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
process_sleep(1);
}
}
static int syscall_sem_post_impl(int sid) {
if (sid < 0 || sid >= SEM_MAX) return -EINVAL;
uintptr_t fl = spin_lock_irqsave(&sem_table[sid].lock);
if (!sem_table[sid].active) {
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
return -EINVAL;
}
sem_table[sid].value++;
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
return 0;
}
static int syscall_sem_unlink_impl(const char* user_name) {
char name[32];
if (copy_from_user(name, user_name, 31) < 0) return -EFAULT;
name[31] = 0;
uintptr_t fl = spin_lock_irqsave(&sem_table_lock);
for (int i = 0; i < SEM_MAX; i++) {
if (sem_table[i].active && strcmp(sem_table[i].name, name) == 0) {
sem_table[i].active = 0;
spin_unlock_irqrestore(&sem_table_lock, fl);
return 0;
}
}
spin_unlock_irqrestore(&sem_table_lock, fl);
return -ENOENT;
}
static int syscall_sem_getvalue_impl(int sid, int* user_val) {
if (sid < 0 || sid >= SEM_MAX) return -EINVAL;
if (!user_val || user_range_ok(user_val, 4) == 0) return -EFAULT;
uintptr_t fl = spin_lock_irqsave(&sem_table[sid].lock);
if (!sem_table[sid].active) {
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
return -EINVAL;
}
int32_t v = sem_table[sid].value;
spin_unlock_irqrestore(&sem_table[sid].lock, fl);
if (copy_to_user(user_val, &v, 4) < 0) return -EFAULT;
return 0;
}
/* --- Shared library loading (dlopen/dlsym/dlclose) --- */
#define DLOPEN_MAX_LIBS 8
#define DLOPEN_MAX_SYMS 64
#define DLOPEN_BASE 0x30000000U
#define DLOPEN_STRIDE 0x00400000U /* 4 MB per library */
struct dl_sym {
char name[64];
uint32_t value;
};
struct dl_lib {
int active;
char path[128];
uint32_t base; /* load base address */
struct dl_sym syms[DLOPEN_MAX_SYMS];
uint32_t nsyms;
};
static struct dl_lib dl_table[DLOPEN_MAX_LIBS];
static spinlock_t dl_lock = {0};
static int syscall_dlopen_impl(const char* user_path) {
char path[128];
if (copy_from_user(path, user_path, 127) < 0) return -EFAULT;
path[127] = 0;
uintptr_t fl = spin_lock_irqsave(&dl_lock);
/* Check if already loaded */
for (int i = 0; i < DLOPEN_MAX_LIBS; i++) {
if (dl_table[i].active && strcmp(dl_table[i].path, path) == 0) {
spin_unlock_irqrestore(&dl_lock, fl);
return i + 1; /* handle = 1-based index */
}
}
/* Find free slot */
int slot = -1;
for (int i = 0; i < DLOPEN_MAX_LIBS; i++) {
if (!dl_table[i].active) { slot = i; break; }
}
if (slot < 0) {
spin_unlock_irqrestore(&dl_lock, fl);
return -ENOMEM;
}
spin_unlock_irqrestore(&dl_lock, fl);
/* Load the ELF .so file */
extern fs_node_t* vfs_lookup(const char* path);
fs_node_t* node = vfs_lookup(path);
if (!node) return -ENOENT;
uint32_t flen = node->length;
if (flen < 52) return -EINVAL; /* minimum ELF header */
extern void* kmalloc(size_t);
extern void kfree(void*);
uint8_t* fbuf = (uint8_t*)kmalloc(flen);
if (!fbuf) return -ENOMEM;
extern uint32_t vfs_read(fs_node_t*, uint32_t, uint32_t, uint8_t*);
if (vfs_read(node, 0, flen, fbuf) != flen) {
kfree(fbuf);
return -EIO;
}
/* Basic ELF validation */
if (fbuf[0] != 0x7F || fbuf[1] != 'E' || fbuf[2] != 'L' || fbuf[3] != 'F') {
kfree(fbuf);
return -EINVAL;
}
/* Load segments into current process address space at slot base */
uint32_t base = DLOPEN_BASE + (uint32_t)slot * DLOPEN_STRIDE;
/* Parse program headers and load PT_LOAD segments */
uint32_t e_phoff = *(uint32_t*)(fbuf + 28);
uint16_t e_phnum = *(uint16_t*)(fbuf + 44);
uint16_t e_phentsize = *(uint16_t*)(fbuf + 42);
if (e_phentsize < 32 || e_phoff + (uint32_t)e_phnum * e_phentsize > flen) {
kfree(fbuf);
return -EINVAL;
}
for (uint16_t i = 0; i < e_phnum; i++) {
uint8_t* ph = fbuf + e_phoff + (uint32_t)i * e_phentsize;
uint32_t p_type = *(uint32_t*)(ph + 0);
uint32_t p_offset = *(uint32_t*)(ph + 4);
uint32_t p_vaddr = *(uint32_t*)(ph + 8);
uint32_t p_filesz = *(uint32_t*)(ph + 16);
uint32_t p_memsz = *(uint32_t*)(ph + 20);
if (p_type != 1) continue; /* PT_LOAD = 1 */
if (p_memsz == 0) continue;
uint32_t vaddr = p_vaddr + base;
if (vaddr >= 0xC0000000U) continue;
/* Map pages */
uint32_t start_page = vaddr & ~0xFFFU;
uint32_t end_page = (vaddr + p_memsz - 1) & ~0xFFFU;
for (uint32_t va = start_page; va <= end_page; va += 0x1000) {
extern void* pmm_alloc_page(void);
void* frame = pmm_alloc_page();
if (!frame) { kfree(fbuf); return -ENOMEM; }
vmm_map_page((uint64_t)(uintptr_t)frame, (uint64_t)va,
VMM_FLAG_PRESENT | VMM_FLAG_RW | VMM_FLAG_USER);
}
if (p_filesz && p_offset + p_filesz <= flen)
memcpy((void*)vaddr, fbuf + p_offset, p_filesz);
if (p_memsz > p_filesz)
memset((void*)(vaddr + p_filesz), 0, p_memsz - p_filesz);
}
/* Extract symbols from .dynsym + .dynstr via PT_DYNAMIC */
fl = spin_lock_irqsave(&dl_lock);
memset(&dl_table[slot], 0, sizeof(dl_table[slot]));
dl_table[slot].active = 1;
strcpy(dl_table[slot].path, path);
dl_table[slot].base = base;
/* Parse PT_DYNAMIC to find SYMTAB and STRTAB */
uint32_t symtab_va = 0, strtab_va = 0, strsz = 0;
uint32_t hash_va = 0;
for (uint16_t i = 0; i < e_phnum; i++) {
uint8_t* ph = fbuf + e_phoff + (uint32_t)i * e_phentsize;
uint32_t p_type = *(uint32_t*)(ph + 0);
uint32_t p_offset = *(uint32_t*)(ph + 4);
uint32_t p_filesz = *(uint32_t*)(ph + 16);
if (p_type != 2) continue; /* PT_DYNAMIC = 2 */
if (p_offset + p_filesz > flen) break;
uint32_t* dyn = (uint32_t*)(fbuf + p_offset);
uint32_t dyn_entries = p_filesz / 8;
for (uint32_t d = 0; d < dyn_entries; d++) {
int32_t tag = (int32_t)dyn[d * 2];
uint32_t val = dyn[d * 2 + 1];
if (tag == 0) break; /* DT_NULL */
if (tag == 6) symtab_va = val + base; /* DT_SYMTAB */
if (tag == 5) strtab_va = val + base; /* DT_STRTAB */
if (tag == 10) strsz = val; /* DT_STRSZ */
if (tag == 4) hash_va = val + base; /* DT_HASH */
}
break;
}
/* Read symbol count from DT_HASH if available: hash[1] = nchain = nsyms */
uint32_t nsyms = 0;
if (hash_va && hash_va < 0xC0000000U) {
nsyms = *(uint32_t*)(hash_va + 4);
}
/* Extract exported symbols */
if (symtab_va && strtab_va && nsyms > 0) {
uint32_t cnt = 0;
for (uint32_t s = 1; s < nsyms && cnt < DLOPEN_MAX_SYMS; s++) {
uint32_t* sym = (uint32_t*)(symtab_va + s * 16);
uint32_t st_name = sym[0];
uint32_t st_value = sym[1];
uint8_t st_info = ((uint8_t*)sym)[12];
uint16_t st_shndx = *(uint16_t*)((uint8_t*)sym + 14);
/* Only global/weak defined symbols */
uint8_t bind = st_info >> 4;
if ((bind != 1 && bind != 2) || st_shndx == 0) continue;
if (st_name >= strsz) continue;
const char* name = (const char*)(strtab_va + st_name);
if (name[0] == 0) continue;
uint32_t nlen = 0;
while (nlen < 63 && name[nlen]) nlen++;
memcpy(dl_table[slot].syms[cnt].name, name, nlen);
dl_table[slot].syms[cnt].name[nlen] = 0;
dl_table[slot].syms[cnt].value = st_value + base;
cnt++;
}
dl_table[slot].nsyms = cnt;
}
spin_unlock_irqrestore(&dl_lock, fl);
kfree(fbuf);
return slot + 1; /* 1-based handle */
}
static int syscall_dlsym_impl(int handle, const char* user_name, uint32_t* user_addr) {
if (handle < 1 || handle > DLOPEN_MAX_LIBS) return -EINVAL;
if (!user_name || !user_addr) return -EFAULT;
if (user_range_ok(user_addr, 4) == 0) return -EFAULT;
char name[64];
if (copy_from_user(name, user_name, 63) < 0) return -EFAULT;
name[63] = 0;
int slot = handle - 1;
uintptr_t fl = spin_lock_irqsave(&dl_lock);
if (!dl_table[slot].active) {
spin_unlock_irqrestore(&dl_lock, fl);
return -EINVAL;
}
for (uint32_t i = 0; i < dl_table[slot].nsyms; i++) {
if (strcmp(dl_table[slot].syms[i].name, name) == 0) {
uint32_t addr = dl_table[slot].syms[i].value;
spin_unlock_irqrestore(&dl_lock, fl);
if (copy_to_user(user_addr, &addr, 4) < 0) return -EFAULT;
return 0;
}
}
spin_unlock_irqrestore(&dl_lock, fl);
return -ENOENT;
}
static int syscall_dlclose_impl(int handle) {
if (handle < 1 || handle > DLOPEN_MAX_LIBS) return -EINVAL;
int slot = handle - 1;
uintptr_t fl = spin_lock_irqsave(&dl_lock);
if (!dl_table[slot].active) {
spin_unlock_irqrestore(&dl_lock, fl);
return -EINVAL;
}
dl_table[slot].active = 0;
spin_unlock_irqrestore(&dl_lock, fl);
return 0;
}
/* --- Advisory file locking (flock) --- */
enum {
FLOCK_SH = 1,
FLOCK_EX = 2,
FLOCK_NB = 4,
FLOCK_UN = 8,
};
#define FLOCK_TABLE_SIZE 64
struct flock_entry {
uint32_t inode;
uint32_t pid;
int type; /* FLOCK_SH or FLOCK_EX */
int active;
};
static struct flock_entry flock_table[FLOCK_TABLE_SIZE];
static spinlock_t flock_lock_g = {0};
static int flock_can_acquire(uint32_t inode, uint32_t pid, int type) {
for (int i = 0; i < FLOCK_TABLE_SIZE; i++) {
if (!flock_table[i].active || flock_table[i].inode != inode)
continue;
if (flock_table[i].pid == pid)
continue; /* our own lock — will be upgraded/downgraded */
if (type == FLOCK_EX || flock_table[i].type == FLOCK_EX)
return 0; /* conflict */
}
return 1;
}
static int flock_do(uint32_t inode, uint32_t pid, int operation) {
int type = operation & (FLOCK_SH | FLOCK_EX);
int nonblock = operation & FLOCK_NB;
if (operation & FLOCK_UN) {
uintptr_t fl = spin_lock_irqsave(&flock_lock_g);
for (int i = 0; i < FLOCK_TABLE_SIZE; i++) {
if (flock_table[i].active && flock_table[i].inode == inode &&
flock_table[i].pid == pid) {
flock_table[i].active = 0;
break;
}
}
spin_unlock_irqrestore(&flock_lock_g, fl);
return 0;
}
if (!type) return -EINVAL;
for (;;) {
uintptr_t fl = spin_lock_irqsave(&flock_lock_g);
if (flock_can_acquire(inode, pid, type)) {
/* Find existing entry for this pid+inode or allocate new */
int slot = -1;
int free_slot = -1;
for (int i = 0; i < FLOCK_TABLE_SIZE; i++) {
if (flock_table[i].active && flock_table[i].inode == inode &&
flock_table[i].pid == pid) {
slot = i;
break;
}
if (!flock_table[i].active && free_slot < 0)
free_slot = i;
}
if (slot >= 0) {
flock_table[slot].type = type; /* upgrade/downgrade */
} else if (free_slot >= 0) {
flock_table[free_slot].inode = inode;
flock_table[free_slot].pid = pid;
flock_table[free_slot].type = type;
flock_table[free_slot].active = 1;
} else {
spin_unlock_irqrestore(&flock_lock_g, fl);
return -ENOLCK;
}
spin_unlock_irqrestore(&flock_lock_g, fl);
return 0;
}
spin_unlock_irqrestore(&flock_lock_g, fl);
if (nonblock) return -EWOULDBLOCK;
extern void process_sleep(uint32_t ticks);
process_sleep(1); /* block and retry */
}
}
static void flock_release_pid(uint32_t pid) {
uintptr_t fl = spin_lock_irqsave(&flock_lock_g);
for (int i = 0; i < FLOCK_TABLE_SIZE; i++) {
if (flock_table[i].active && flock_table[i].pid == pid)
flock_table[i].active = 0;
}
spin_unlock_irqrestore(&flock_lock_g, fl);
}
enum {
FCNTL_F_DUPFD = 0,
FCNTL_F_GETFD = 1,
FCNTL_F_SETFD = 2,
FCNTL_F_GETFL = 3,
FCNTL_F_SETFL = 4,
FCNTL_F_GETLK = 5,
FCNTL_F_SETLK = 6,
FCNTL_F_SETLKW = 7,
FCNTL_F_DUPFD_CLOEXEC = 1030,
FCNTL_F_GETPIPE_SZ = 1032,
FCNTL_F_SETPIPE_SZ = 1033,
};
enum {
F_RDLCK = 0,
F_WRLCK = 1,
F_UNLCK = 2,
};
struct k_flock {
int16_t l_type;
int16_t l_whence;
uint32_t l_start;
uint32_t l_len; /* 0 = to EOF */
uint32_t l_pid;
};
#define RLOCK_TABLE_SIZE 64
struct rlock_entry {
uint32_t inode;
uint32_t pid;
uint32_t start;
uint32_t end; /* 0xFFFFFFFF = to EOF */
int type; /* F_RDLCK or F_WRLCK */
int active;
};
static struct rlock_entry rlock_table[RLOCK_TABLE_SIZE];
static spinlock_t rlock_lock_g = {0};
static int rlock_overlaps(uint32_t s1, uint32_t e1, uint32_t s2, uint32_t e2) {
return s1 <= e2 && s2 <= e1;
}
static int rlock_conflicts(uint32_t inode, uint32_t pid, int type,
uint32_t start, uint32_t end, struct rlock_entry** out) {
for (int i = 0; i < RLOCK_TABLE_SIZE; i++) {
struct rlock_entry* e = &rlock_table[i];
if (!e->active || e->inode != inode) continue;
if (e->pid == pid) continue;
if (!rlock_overlaps(start, end, e->start, e->end)) continue;
if (type == F_WRLCK || e->type == F_WRLCK) {
if (out) *out = e;
return 1;
}
}
return 0;
}
static int rlock_setlk(uint32_t inode, uint32_t pid, int type,
uint32_t start, uint32_t end, int blocking) {
if (type == F_UNLCK) {
uintptr_t fl = spin_lock_irqsave(&rlock_lock_g);
for (int i = 0; i < RLOCK_TABLE_SIZE; i++) {
struct rlock_entry* e = &rlock_table[i];
if (e->active && e->inode == inode && e->pid == pid &&
rlock_overlaps(start, end, e->start, e->end)) {
e->active = 0;
}
}
spin_unlock_irqrestore(&rlock_lock_g, fl);
return 0;
}
for (;;) {
uintptr_t fl = spin_lock_irqsave(&rlock_lock_g);
if (!rlock_conflicts(inode, pid, type, start, end, NULL)) {
/* Remove our own overlapping locks, then insert */
int slot = -1;
for (int i = 0; i < RLOCK_TABLE_SIZE; i++) {
struct rlock_entry* e = &rlock_table[i];
if (e->active && e->inode == inode && e->pid == pid &&
rlock_overlaps(start, end, e->start, e->end)) {
e->active = 0;
}
if (!e->active && slot < 0) slot = i;
}
if (slot < 0) {
/* Scan again for free slot after removals */
for (int i = 0; i < RLOCK_TABLE_SIZE; i++) {
if (!rlock_table[i].active) { slot = i; break; }
}
}
if (slot < 0) {
spin_unlock_irqrestore(&rlock_lock_g, fl);
return -ENOLCK;
}
rlock_table[slot].inode = inode;
rlock_table[slot].pid = pid;
rlock_table[slot].start = start;
rlock_table[slot].end = end;
rlock_table[slot].type = type;
rlock_table[slot].active = 1;
spin_unlock_irqrestore(&rlock_lock_g, fl);
return 0;
}
spin_unlock_irqrestore(&rlock_lock_g, fl);
if (!blocking) return -EAGAIN;
extern void process_sleep(uint32_t ticks);
process_sleep(1);
}
}
static void rlock_release_pid(uint32_t pid) {
uintptr_t fl = spin_lock_irqsave(&rlock_lock_g);
for (int i = 0; i < RLOCK_TABLE_SIZE; i++) {
if (rlock_table[i].active && rlock_table[i].pid == pid)
rlock_table[i].active = 0;
}
spin_unlock_irqrestore(&rlock_lock_g, fl);
}
enum {
AT_FDCWD = -100,
};
static int path_resolve_user(const char* user_path, char* out, size_t out_sz);
static int fd_alloc(struct file* f);
static int fd_close(int fd);
static struct file* fd_get(int fd);
static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no);
static void posix_ext_syscall_dispatch(struct registers* regs, uint32_t syscall_no);
struct pollfd {
int fd;
int16_t events;
int16_t revents;
};
enum {
POLLIN = 0x0001,
POLLOUT = 0x0004,
POLLERR = 0x0008,
POLLHUP = 0x0010,
};
static int poll_wait_kfds(struct pollfd* kfds, uint32_t nfds, int32_t timeout);
static int syscall_select_impl(uint32_t nfds,
uint64_t* user_readfds,
uint64_t* user_writefds,
uint64_t* user_exceptfds,
int32_t timeout) {
if (nfds > 64U) return -EINVAL;
if (user_exceptfds) return -EINVAL;
uint64_t rmask = 0;
uint64_t wmask = 0;
if (user_readfds) {
if (user_range_ok(user_readfds, sizeof(*user_readfds)) == 0) return -EFAULT;
if (copy_from_user(&rmask, user_readfds, sizeof(rmask)) < 0) return -EFAULT;
}
if (user_writefds) {
if (user_range_ok(user_writefds, sizeof(*user_writefds)) == 0) return -EFAULT;
if (copy_from_user(&wmask, user_writefds, sizeof(wmask)) < 0) return -EFAULT;
}
struct pollfd kfds[64];
uint32_t cnt = 0;
for (uint32_t fd = 0; fd < nfds; fd++) {
int16_t events = 0;
if ((rmask >> fd) & 1U) events |= POLLIN;
if ((wmask >> fd) & 1U) events |= POLLOUT;
if (!events) continue;
kfds[cnt].fd = (int)fd;
kfds[cnt].events = events;
kfds[cnt].revents = 0;
cnt++;
}
if (cnt == 0) {
if (user_readfds && copy_to_user(user_readfds, &rmask, sizeof(rmask)) < 0) return -EFAULT;
if (user_writefds && copy_to_user(user_writefds, &wmask, sizeof(wmask)) < 0) return -EFAULT;
return 0;
}
int rc = poll_wait_kfds(kfds, cnt, timeout);
if (rc < 0) return rc;
uint64_t r_out = 0;
uint64_t w_out = 0;
int ready = 0;
for (uint32_t i = 0; i < cnt; i++) {
uint32_t fd = (uint32_t)kfds[i].fd;
if ((kfds[i].revents & POLLIN) && ((rmask >> fd) & 1U)) {
r_out |= (1ULL << fd);
}
if ((kfds[i].revents & POLLOUT) && ((wmask >> fd) & 1U)) {
w_out |= (1ULL << fd);
}
}
uint64_t any = r_out | w_out;
for (uint32_t fd = 0; fd < nfds; fd++) {
if ((any >> fd) & 1ULL) ready++;
}
if (user_readfds && copy_to_user(user_readfds, &r_out, sizeof(r_out)) < 0) return -EFAULT;
if (user_writefds && copy_to_user(user_writefds, &w_out, sizeof(w_out)) < 0) return -EFAULT;
return ready;
}
static int execve_copy_user_str(char* out, size_t out_sz, const char* user_s) {
if (!out || out_sz == 0 || !user_s) return -EFAULT;
for (size_t i = 0; i < out_sz; i++) {
if (copy_from_user(&out[i], &user_s[i], 1) < 0) return -EFAULT;
if (out[i] == 0) return 0;
}
out[out_sz - 1] = 0;
return 0;
}
static int execve_copy_user_ptr(const void* const* user_p, uintptr_t* out) {
if (!out) return -EFAULT;
if (!user_p) {
*out = 0;
return 0;
}
uintptr_t tmp = 0;
if (copy_from_user(&tmp, user_p, sizeof(tmp)) < 0) return -EFAULT;
*out = tmp;
return 0;
}
static int syscall_fork_impl(struct registers* regs) {
if (!regs) return -EINVAL;
if (!current_process) return -EINVAL;
uintptr_t src_as = hal_cpu_get_address_space() & ~(uintptr_t)0xFFFU;
if (current_process->addr_space != src_as) {
current_process->addr_space = src_as;
}
uintptr_t child_as = vmm_as_clone_user_cow(src_as);
if (!child_as) return -ENOMEM;
struct registers child_regs = *regs;
arch_regs_set_retval(&child_regs, 0);
struct process* child = process_fork_create(child_as, &child_regs);
if (!child) {
vmm_as_destroy(child_as);
return -ENOMEM;
}
child->heap_start = current_process->heap_start;
child->heap_break = current_process->heap_break;
/* FDs are already copied inside process_fork_create under sched_lock
* to prevent race where child runs before FDs are set up. */
return (int)child->pid;
}
__attribute__((noinline))
static int syscall_clone_impl(struct registers* regs) {
if (!regs || !current_process) return -EINVAL;
uint32_t clone_flags = sc_arg0(regs);
uintptr_t child_stack = (uintptr_t)sc_arg1(regs);
uintptr_t tls_base = (uintptr_t)sc_arg3(regs);
struct process* child = process_clone_create(clone_flags, child_stack, regs, tls_base);
if (!child) return -ENOMEM;
/* CLONE_PARENT_SETTID: write child tid to parent user address */
if ((clone_flags & CLONE_PARENT_SETTID) && sc_arg2(regs)) {
uint32_t tid = child->pid;
(void)copy_to_user((void*)(uintptr_t)sc_arg2(regs), &tid, sizeof(tid));
}
/* CLONE_CHILD_CLEARTID: store the address for the child to clear on exit */
if ((clone_flags & CLONE_CHILD_CLEARTID) && sc_arg4(regs)) {
child->clear_child_tid = (uint32_t*)(uintptr_t)sc_arg4(regs);
}
return (int)child->pid;
}
struct pipe_state {
uint8_t* buf;
uint32_t cap;
uint32_t rpos;
uint32_t wpos;
uint32_t count;
uint32_t readers;
uint32_t writers;
};
struct pipe_node {
fs_node_t node;
struct pipe_state* ps;
uint32_t is_read_end;
};
static int poll_wait_kfds(struct pollfd* kfds, uint32_t nfds, int32_t timeout) {
if (!kfds) return -EINVAL;
if (nfds > 64U) return -EINVAL;
extern uint32_t get_tick_count(void);
uint32_t start_tick = get_tick_count();
for (;;) {
int ready = 0;
for (uint32_t i = 0; i < nfds; i++) {
kfds[i].revents = 0;
int fd = kfds[i].fd;
if (fd < 0) continue;
struct file* f = fd_get(fd);
if (!f || !f->node) {
kfds[i].revents |= POLLERR;
ready++;
continue;
}
fs_node_t* n = f->node;
int (*fn_poll)(fs_node_t*, int) = NULL;
if (n->f_ops && n->f_ops->poll) fn_poll = n->f_ops->poll;
if (fn_poll) {
int vfs_events = 0;
if (kfds[i].events & POLLIN) vfs_events |= VFS_POLL_IN;
if (kfds[i].events & POLLOUT) vfs_events |= VFS_POLL_OUT;
int vfs_rev = fn_poll(n, vfs_events);
if (vfs_rev & VFS_POLL_IN) kfds[i].revents |= POLLIN;
if (vfs_rev & VFS_POLL_OUT) kfds[i].revents |= POLLOUT;
if (vfs_rev & VFS_POLL_ERR) kfds[i].revents |= POLLERR;
if (vfs_rev & VFS_POLL_HUP) kfds[i].revents |= POLLHUP;
} else {
if (kfds[i].events & POLLIN) kfds[i].revents |= POLLIN;
if (kfds[i].events & POLLOUT) kfds[i].revents |= POLLOUT;
}
if (kfds[i].revents) ready++;
}
if (ready) return ready;
if (timeout == 0) return 0;
if (timeout > 0) {
uint32_t now = get_tick_count();
uint32_t elapsed = now - start_tick;
if (elapsed >= (uint32_t)timeout) return 0;
}
process_sleep(1);
}
}
static int syscall_poll_impl(struct pollfd* user_fds, uint32_t nfds, int32_t timeout) {
if (!user_fds) return -EFAULT;
if (nfds > 64U) return -EINVAL;
if (user_range_ok(user_fds, sizeof(struct pollfd) * (size_t)nfds) == 0) return -EFAULT;
struct pollfd kfds[64];
if (copy_from_user(kfds, user_fds, sizeof(struct pollfd) * (size_t)nfds) < 0) return -EFAULT;
int rc = poll_wait_kfds(kfds, nfds, timeout);
if (rc < 0) return rc;
if (copy_to_user(user_fds, kfds, sizeof(struct pollfd) * (size_t)nfds) < 0) return -EFAULT;
return rc;
}
/* ------------------------------------------------------------------ */
/* epoll implementation */
/* ------------------------------------------------------------------ */
#define EPOLL_MAX_EVENTS 64
enum {
EPOLL_CTL_ADD = 1,
EPOLL_CTL_DEL = 2,
EPOLL_CTL_MOD = 3,
};
enum {
EPOLLIN = 0x001,
EPOLLOUT = 0x004,
EPOLLERR = 0x008,
EPOLLHUP = 0x010,
EPOLLET = (1U << 31),
};
struct epoll_event {
uint32_t events;
uint64_t data;
};
struct epoll_interest {
int fd;
uint32_t events;
uint64_t data;
uint32_t last_revents;
};
struct epoll_instance {
struct epoll_interest items[EPOLL_MAX_EVENTS];
int count;
};
static void epoll_close(fs_node_t* node) {
if (node && node->inode) {
kfree((void*)(uintptr_t)node->inode);
node->inode = 0;
}
}
static int epoll_poll(fs_node_t* node, int events) {
(void)node; (void)events;
return 0;
}
static struct file_operations epoll_fops = {
.read = NULL,
.write = NULL,
.open = NULL,
.close = epoll_close,
.ioctl = NULL,
.mmap = NULL,
.poll = epoll_poll,
};
static int syscall_epoll_create_impl(void) {
if (!current_process) return -EINVAL;
struct epoll_instance* ep = (struct epoll_instance*)kmalloc(sizeof(*ep));
if (!ep) return -ENOMEM;
memset(ep, 0, sizeof(*ep));
fs_node_t* node = (fs_node_t*)kmalloc(sizeof(fs_node_t));
if (!node) { kfree(ep); return -ENOMEM; }
memset(node, 0, sizeof(*node));
node->flags = FS_FILE;
node->f_ops = &epoll_fops;
node->inode = (uintptr_t)ep;
struct file* f = (struct file*)kmalloc(sizeof(struct file));
if (!f) { kfree(node); kfree(ep); return -ENOMEM; }
memset(f, 0, sizeof(*f));
f->node = node;
f->refcount = 1;
int fd = fd_alloc(f);
if (fd < 0) { kfree(f); kfree(node); kfree(ep); }
return fd;
}
static int syscall_epoll_ctl_impl(int epfd, int op, int fd,
struct epoll_event* user_event) {
if (!current_process) return -EINVAL;
struct file* ef = fd_get(epfd);
if (!ef || !ef->node || ef->node->f_ops != &epoll_fops) return -EBADF;
struct epoll_instance* ep = (struct epoll_instance*)(uintptr_t)ef->node->inode;
if (!ep) return -EBADF;
if (fd < 0 || !fd_get(fd)) return -EBADF;
if (fd == epfd) return -EINVAL;
int idx = -1;
for (int i = 0; i < ep->count; i++) {
if (ep->items[i].fd == fd) { idx = i; break; }
}
if (op == EPOLL_CTL_ADD) {
if (idx >= 0) return -EEXIST;
if (ep->count >= EPOLL_MAX_EVENTS) return -ENOSPC;
if (!user_event) return -EFAULT;
struct epoll_event ev;
if (copy_from_user(&ev, user_event, sizeof(ev)) < 0) return -EFAULT;
ep->items[ep->count].fd = fd;
ep->items[ep->count].events = ev.events;
ep->items[ep->count].data = ev.data;
ep->items[ep->count].last_revents = 0;
ep->count++;
return 0;
}
if (op == EPOLL_CTL_MOD) {
if (idx < 0) return -ENOENT;
if (!user_event) return -EFAULT;
struct epoll_event ev;
if (copy_from_user(&ev, user_event, sizeof(ev)) < 0) return -EFAULT;
ep->items[idx].events = ev.events;
ep->items[idx].data = ev.data;
ep->items[idx].last_revents = 0;
return 0;
}
if (op == EPOLL_CTL_DEL) {
if (idx < 0) return -ENOENT;
ep->items[idx] = ep->items[ep->count - 1];
ep->count--;
return 0;
}
return -EINVAL;
}
__attribute__((noinline))
static int syscall_epoll_wait_impl(int epfd, struct epoll_event* user_events,
int maxevents, int timeout) {
if (!current_process) return -EINVAL;
if (maxevents <= 0 || maxevents > EPOLL_MAX_EVENTS) return -EINVAL;
if (!user_events) return -EFAULT;
if (user_range_ok(user_events, sizeof(struct epoll_event) * (size_t)maxevents) == 0)
return -EFAULT;
struct file* ef = fd_get(epfd);
if (!ef || !ef->node || ef->node->f_ops != &epoll_fops) return -EBADF;
struct epoll_instance* ep = (struct epoll_instance*)(uintptr_t)ef->node->inode;
if (!ep) return -EBADF;
extern uint32_t get_tick_count(void);
uint32_t start_tick = get_tick_count();
for (;;) {
struct epoll_event out[EPOLL_MAX_EVENTS];
int ready = 0;
for (int i = 0; i < ep->count && ready < maxevents; i++) {
int fd = ep->items[i].fd;
struct file* f = fd_get(fd);
if (!f || !f->node) {
out[ready].events = EPOLLERR;
out[ready].data = ep->items[i].data;
ready++;
continue;
}
int vfs_req = 0;
if (ep->items[i].events & EPOLLIN) vfs_req |= VFS_POLL_IN;
if (ep->items[i].events & EPOLLOUT) vfs_req |= VFS_POLL_OUT;
int vfs_rev = 0;
int (*fn_poll)(fs_node_t*, int) = NULL;
if (f->node->f_ops && f->node->f_ops->poll) fn_poll = f->node->f_ops->poll;
if (fn_poll) {
vfs_rev = fn_poll(f->node, vfs_req);
} else {
vfs_rev = vfs_req;
}
uint32_t revents = 0;
if (vfs_rev & VFS_POLL_IN) revents |= EPOLLIN;
if (vfs_rev & VFS_POLL_OUT) revents |= EPOLLOUT;
if (vfs_rev & VFS_POLL_ERR) revents |= EPOLLERR;
if (vfs_rev & VFS_POLL_HUP) revents |= EPOLLHUP;
if (revents) {
int report = 1;
if (ep->items[i].events & EPOLLET) {
uint32_t new_bits = revents & ~ep->items[i].last_revents;
if (!new_bits) report = 0;
}
ep->items[i].last_revents = revents;
if (report) {
out[ready].events = revents;
out[ready].data = ep->items[i].data;
ready++;
}
} else {
ep->items[i].last_revents = 0;
}
}
if (ready > 0) {
if (copy_to_user(user_events, out,
sizeof(struct epoll_event) * (size_t)ready) < 0)
return -EFAULT;
return ready;
}
if (timeout == 0) return 0;
if (timeout > 0) {
uint32_t now = get_tick_count();
uint32_t elapsed = now - start_tick;
if (elapsed >= (uint32_t)timeout) return 0;
}
process_sleep(1);
}
}
/* ------------------------------------------------------------------ */
/* inotify implementation */
/* ------------------------------------------------------------------ */
#define INOTIFY_MAX_WATCHES 32
#define INOTIFY_EVENT_BUF 4096
enum {
IN_ACCESS = 0x00000001,
IN_MODIFY = 0x00000002,
IN_ATTRIB = 0x00000004,
IN_CLOSE_WRITE = 0x00000008,
IN_CLOSE_NOWRITE = 0x00000010,
IN_OPEN = 0x00000020,
IN_MOVED_FROM = 0x00000040,
IN_MOVED_TO = 0x00000080,
IN_CREATE = 0x00000100,
IN_DELETE = 0x00000200,
IN_DELETE_SELF = 0x00000400,
IN_MOVE_SELF = 0x00000800,
};
struct inotify_event_hdr {
int wd;
uint32_t mask;
uint32_t cookie;
uint32_t len;
};
struct inotify_watch {
int wd;
char path[128];
uint32_t mask;
int active;
};
struct inotify_instance {
struct inotify_watch watches[INOTIFY_MAX_WATCHES];
int next_wd;
uint8_t event_buf[INOTIFY_EVENT_BUF];
uint32_t event_rpos;
uint32_t event_wpos;
uint32_t event_count;
};
__attribute__((unused))
static void inotify_push_event(struct inotify_instance* in, int wd,
uint32_t mask, const char* name) {
uint32_t name_len = 0;
if (name) {
while (name[name_len]) name_len++;
name_len++; /* include NUL */
name_len = (name_len + 3) & ~3U; /* align to 4 bytes */
}
uint32_t total = sizeof(struct inotify_event_hdr) + name_len;
if (in->event_count + total > INOTIFY_EVENT_BUF) return; /* drop if full */
struct inotify_event_hdr hdr;
hdr.wd = wd;
hdr.mask = mask;
hdr.cookie = 0;
hdr.len = name_len;
/* Write header byte by byte into ring */
const uint8_t* hp = (const uint8_t*)&hdr;
for (uint32_t i = 0; i < sizeof(hdr); i++) {
in->event_buf[in->event_wpos % INOTIFY_EVENT_BUF] = hp[i];
in->event_wpos++;
}
/* Write name (padded) */
if (name && name_len > 0) {
for (uint32_t i = 0; i < name_len; i++) {
uint8_t c = (i < name_len && name[i]) ? (uint8_t)name[i] : 0;
in->event_buf[in->event_wpos % INOTIFY_EVENT_BUF] = c;
in->event_wpos++;
}
}
in->event_count += total;
}
static uint32_t inotify_read(fs_node_t* node, uint32_t offset, uint32_t size, uint8_t* buffer) {
(void)offset;
if (!node || !buffer) return 0;
struct inotify_instance* in = (struct inotify_instance*)(uintptr_t)node->inode;
if (!in) return 0;
if (in->event_count == 0) return 0;
uint32_t copied = 0;
while (copied < size && in->event_count > 0) {
/* Peek at header to get total event size */
if (in->event_count < sizeof(struct inotify_event_hdr)) break;
struct inotify_event_hdr hdr = {0, 0, 0, 0};
uint8_t* hp = (uint8_t*)&hdr;
for (uint32_t i = 0; i < sizeof(hdr); i++)
hp[i] = in->event_buf[(in->event_rpos + i) % INOTIFY_EVENT_BUF];
uint32_t total = sizeof(hdr) + hdr.len;
if (in->event_count < total) break;
if (copied + total > size) break;
for (uint32_t i = 0; i < total; i++) {
buffer[copied + i] = in->event_buf[(in->event_rpos + i) % INOTIFY_EVENT_BUF];
}
in->event_rpos = (in->event_rpos + total) % INOTIFY_EVENT_BUF;
in->event_count -= total;
copied += total;
}
return copied;
}
static void inotify_close(fs_node_t* node) {
if (node && node->inode) {
kfree((void*)(uintptr_t)node->inode);
node->inode = 0;
}
}
static int inotify_poll(fs_node_t* node, int events) {
if (!node) return 0;
struct inotify_instance* in = (struct inotify_instance*)(uintptr_t)node->inode;
if (!in) return 0;
int rev = 0;
if ((events & VFS_POLL_IN) && in->event_count > 0) rev |= VFS_POLL_IN;
return rev;
}
static struct file_operations inotify_fops = {
.read = inotify_read,
.write = NULL,
.open = NULL,
.close = inotify_close,
.ioctl = NULL,
.mmap = NULL,
.poll = inotify_poll,
};
static int syscall_inotify_init_impl(void) {
if (!current_process) return -EINVAL;
struct inotify_instance* in = (struct inotify_instance*)kmalloc(sizeof(*in));
if (!in) return -ENOMEM;
memset(in, 0, sizeof(*in));
in->next_wd = 1;
fs_node_t* node = (fs_node_t*)kmalloc(sizeof(fs_node_t));
if (!node) { kfree(in); return -ENOMEM; }
memset(node, 0, sizeof(*node));
node->flags = FS_FILE;
node->f_ops = &inotify_fops;
node->inode = (uintptr_t)in;
struct file* f = (struct file*)kmalloc(sizeof(struct file));
if (!f) { kfree(node); kfree(in); return -ENOMEM; }
memset(f, 0, sizeof(*f));
f->node = node;
f->refcount = 1;
int fd = fd_alloc(f);
if (fd < 0) { kfree(f); kfree(node); kfree(in); }
return fd;
}
static int syscall_inotify_add_watch_impl(int infd, const char* user_path, uint32_t mask) {
if (!current_process) return -EINVAL;
struct file* ef = fd_get(infd);
if (!ef || !ef->node || ef->node->f_ops != &inotify_fops) return -EBADF;
struct inotify_instance* in = (struct inotify_instance*)(uintptr_t)ef->node->inode;
if (!in) return -EBADF;
char kpath[128];
if (path_resolve_user(user_path, kpath, sizeof(kpath)) < 0) return -EFAULT;
/* Check path exists */
fs_node_t* target = vfs_lookup(kpath);
if (!target) return -ENOENT;
/* Check if already watching this path */
for (int i = 0; i < INOTIFY_MAX_WATCHES; i++) {
if (in->watches[i].active && strcmp(in->watches[i].path, kpath) == 0) {
in->watches[i].mask = mask;
return in->watches[i].wd;
}
}
/* Find free slot */
int slot = -1;
for (int i = 0; i < INOTIFY_MAX_WATCHES; i++) {
if (!in->watches[i].active) { slot = i; break; }
}
if (slot < 0) return -ENOSPC;
int wd = in->next_wd++;
in->watches[slot].wd = wd;
strncpy(in->watches[slot].path, kpath, 127);
in->watches[slot].path[127] = 0;
in->watches[slot].mask = mask;
in->watches[slot].active = 1;
return wd;
}
static int syscall_inotify_rm_watch_impl(int infd, int wd) {
if (!current_process) return -EINVAL;
struct file* ef = fd_get(infd);
if (!ef || !ef->node || ef->node->f_ops != &inotify_fops) return -EBADF;
struct inotify_instance* in = (struct inotify_instance*)(uintptr_t)ef->node->inode;
if (!in) return -EBADF;
for (int i = 0; i < INOTIFY_MAX_WATCHES; i++) {
if (in->watches[i].active && in->watches[i].wd == wd) {
in->watches[i].active = 0;
return 0;
}
}
return -EINVAL;
}
/* ------------------------------------------------------------------ */
/* aio_* — POSIX asynchronous I/O (synchronous implementation) */
/* ------------------------------------------------------------------ */
struct aiocb {
int aio_fildes;
void* aio_buf;
uint32_t aio_nbytes;
uint32_t aio_offset;
int32_t aio_error; /* 0 = done, EINPROGRESS = pending */
int32_t aio_return; /* bytes transferred or -errno */
};
__attribute__((noinline))
static int syscall_aio_rw_impl(void* user_cb, int is_write) {
if (!current_process || !user_cb) return -EINVAL;
if (user_range_ok(user_cb, sizeof(struct aiocb)) == 0) return -EFAULT;
struct aiocb cb;
if (copy_from_user(&cb, user_cb, sizeof(cb)) < 0) return -EFAULT;
int fd = cb.aio_fildes;
struct file* f = fd_get(fd);
if (!f || !f->node) {
cb.aio_error = EBADF;
cb.aio_return = -EBADF;
(void)copy_to_user(user_cb, &cb, sizeof(cb));
return 0;
}
if (!cb.aio_buf || cb.aio_nbytes == 0) {
cb.aio_error = 0;
cb.aio_return = 0;
(void)copy_to_user(user_cb, &cb, sizeof(cb));
return 0;
}
if (user_range_ok(cb.aio_buf, cb.aio_nbytes) == 0) {
cb.aio_error = EFAULT;
cb.aio_return = -EFAULT;
(void)copy_to_user(user_cb, &cb, sizeof(cb));
return 0;
}
int32_t result;
if (is_write) {
uint32_t (*fn_write)(fs_node_t*, uint32_t, uint32_t, const uint8_t*) = NULL;
if (f->node->f_ops && f->node->f_ops->write) fn_write = f->node->f_ops->write;
if (fn_write) {
result = (int32_t)fn_write(f->node, cb.aio_offset, cb.aio_nbytes,
(const uint8_t*)cb.aio_buf);
} else {
result = -ENOSYS;
}
} else {
uint32_t (*fn_read)(fs_node_t*, uint32_t, uint32_t, uint8_t*) = NULL;
if (f->node->f_ops && f->node->f_ops->read) fn_read = f->node->f_ops->read;
if (fn_read) {
result = (int32_t)fn_read(f->node, cb.aio_offset, cb.aio_nbytes,
(uint8_t*)cb.aio_buf);
} else {
result = -ENOSYS;
}
}
cb.aio_error = (result < 0) ? -result : 0;
cb.aio_return = result;
(void)copy_to_user(user_cb, &cb, sizeof(cb));
return 0;
}
__attribute__((noinline))
static int syscall_aio_error_impl(void* user_cb) {
if (!user_cb) return -EINVAL;
if (user_range_ok(user_cb, sizeof(struct aiocb)) == 0) return -EFAULT;
struct aiocb cb;
if (copy_from_user(&cb, user_cb, sizeof(cb)) < 0) return -EFAULT;
return cb.aio_error;
}
__attribute__((noinline))
static int syscall_aio_return_impl(void* user_cb) {
if (!user_cb) return -EINVAL;
if (user_range_ok(user_cb, sizeof(struct aiocb)) == 0) return -EFAULT;
struct aiocb cb;
if (copy_from_user(&cb, user_cb, sizeof(cb)) < 0) return -EFAULT;
return cb.aio_return;
}
static uint32_t pipe_read(fs_node_t* n, uint32_t offset, uint32_t size, uint8_t* buffer) {
(void)offset;
struct pipe_node* pn = (struct pipe_node*)n;
if (!pn || !pn->ps || !buffer) return 0;
if (!pn->is_read_end) return 0;
struct pipe_state* ps = pn->ps;
if (size == 0) return 0;
uint32_t to_read = size;
if (to_read > ps->count) to_read = ps->count;
for (uint32_t i = 0; i < to_read; i++) {
buffer[i] = ps->buf[ps->rpos];
ps->rpos++;
if (ps->rpos == ps->cap) ps->rpos = 0;
}
ps->count -= to_read;
return to_read;
}
static uint32_t pipe_write(fs_node_t* n, uint32_t offset, uint32_t size, const uint8_t* buffer) {
(void)offset;
struct pipe_node* pn = (struct pipe_node*)n;
if (!pn || !pn->ps || !buffer) return 0;
if (pn->is_read_end) return 0;
struct pipe_state* ps = pn->ps;
if (size == 0) return 0;
if (ps->readers == 0) return 0;
uint32_t free = ps->cap - ps->count;
uint32_t to_write = size;
if (to_write > free) to_write = free;
for (uint32_t i = 0; i < to_write; i++) {
ps->buf[ps->wpos] = buffer[i];
ps->wpos++;
if (ps->wpos == ps->cap) ps->wpos = 0;
}
ps->count += to_write;
return to_write;
}
static void pipe_close(fs_node_t* n) {
struct pipe_node* pn = (struct pipe_node*)n;
if (!pn || !pn->ps) {
if (pn) kfree(pn);
return;
}
if (pn->is_read_end) {
if (pn->ps->readers) pn->ps->readers--;
} else {
if (pn->ps->writers) pn->ps->writers--;
}
struct pipe_state* ps = pn->ps;
kfree(pn);
if (ps->readers == 0 && ps->writers == 0) {
if (ps->buf) kfree(ps->buf);
kfree(ps);
}
}
static int pipe_poll(fs_node_t* n, int events) {
struct pipe_node* pn = (struct pipe_node*)n;
if (!pn || !pn->ps) return VFS_POLL_ERR;
struct pipe_state* ps = pn->ps;
int revents = 0;
if (pn->is_read_end) {
if ((events & VFS_POLL_IN) && (ps->count > 0 || ps->writers == 0)) {
revents |= VFS_POLL_IN;
if (ps->writers == 0) revents |= VFS_POLL_HUP;
}
} else {
if (ps->readers == 0) {
if (events & VFS_POLL_OUT) revents |= VFS_POLL_ERR;
} else {
uint32_t free = ps->cap - ps->count;
if ((events & VFS_POLL_OUT) && free > 0) revents |= VFS_POLL_OUT;
}
}
return revents;
}
static int pipe_ioctl(fs_node_t* n, uint32_t cmd, void* user_arg) {
struct pipe_node* pn = (struct pipe_node*)n;
if (!pn || !pn->ps) return -EBADF;
if (cmd == 0x541B /* FIONREAD */) {
if (!user_arg || user_range_ok(user_arg, sizeof(int)) == 0) return -EFAULT;
int avail = (int)pn->ps->count;
if (copy_to_user(user_arg, &avail, sizeof(avail)) < 0) return -EFAULT;
return 0;
}
return -ENOTTY;
}
static const struct file_operations pipe_read_fops = {
.read = pipe_read,
.close = pipe_close,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
};
static const struct file_operations pipe_write_fops = {
.write = pipe_write,
.close = pipe_close,
.poll = pipe_poll,
.ioctl = pipe_ioctl,
};
static int pipe_node_create(struct pipe_state* ps, int is_read_end, fs_node_t** out_node) {
if (!ps || !out_node) return -EINVAL;
struct pipe_node* pn = (struct pipe_node*)kmalloc(sizeof(*pn));
if (!pn) return -ENOMEM;
memset(pn, 0, sizeof(*pn));
pn->ps = ps;
pn->is_read_end = is_read_end ? 1U : 0U;
pn->node.flags = FS_FILE;
pn->node.length = 0;
if (pn->is_read_end) {
strcpy(pn->node.name, "pipe:r");
pn->node.f_ops = &pipe_read_fops;
ps->readers++;
} else {
strcpy(pn->node.name, "pipe:w");
pn->node.f_ops = &pipe_write_fops;
ps->writers++;
}
*out_node = &pn->node;
return 0;
}
static int pipe_create_kfds(int kfds[2]) {
if (!kfds) return -EINVAL;
struct pipe_state* ps = (struct pipe_state*)kmalloc(sizeof(*ps));
if (!ps) return -ENOMEM;
memset(ps, 0, sizeof(*ps));
ps->cap = 512;
ps->buf = (uint8_t*)kmalloc(ps->cap);
if (!ps->buf) {
kfree(ps);
return -ENOMEM;
}
fs_node_t* rnode = NULL;
fs_node_t* wnode = NULL;
if (pipe_node_create(ps, 1, &rnode) < 0) {
kfree(ps->buf);
kfree(ps);
return -ENOMEM;
}
if (pipe_node_create(ps, 0, &wnode) < 0) {
vfs_close(rnode);
return -ENOMEM;
}
struct file* rf = (struct file*)kmalloc(sizeof(*rf));
struct file* wf = (struct file*)kmalloc(sizeof(*wf));
if (!rf || !wf) {
if (rf) kfree(rf);
if (wf) kfree(wf);
vfs_close(rnode);
vfs_close(wnode);
return -ENOMEM;
}
memset(rf, 0, sizeof(*rf));
memset(wf, 0, sizeof(*wf));
rf->node = rnode;
rf->refcount = 1;
wf->node = wnode;
wf->refcount = 1;
int rfd = fd_alloc(rf);
if (rfd < 0) {
kfree(rf);
kfree(wf);
vfs_close(rnode);
vfs_close(wnode);
return -EMFILE;
}
int wfd = fd_alloc(wf);
if (wfd < 0) {
(void)fd_close(rfd);
kfree(wf);
vfs_close(wnode);
return -EMFILE;
}
kfds[0] = rfd;
kfds[1] = wfd;
return 0;
}
static int syscall_pipe_impl(int* user_fds) {
if (!user_fds) return -EFAULT;
if (user_range_ok(user_fds, sizeof(int) * 2) == 0) return -EFAULT;
int kfds[2];
int rc = pipe_create_kfds(kfds);
if (rc < 0) return rc;
if (copy_to_user(user_fds, kfds, sizeof(kfds)) < 0) {
(void)fd_close(kfds[0]);
(void)fd_close(kfds[1]);
return -EFAULT;
}
return 0;
}
static int syscall_pipe2_impl(int* user_fds, uint32_t flags) {
if (!user_fds) return -EFAULT;
if (user_range_ok(user_fds, sizeof(int) * 2) == 0) return -EFAULT;
int kfds[2];
int rc = pipe_create_kfds(kfds);
if (rc < 0) return rc;
if (!current_process) return -ECHILD;
if (kfds[0] >= 0 && kfds[0] < PROCESS_MAX_FILES && current_process->files[kfds[0]]) {
current_process->files[kfds[0]]->flags = flags & ~O_CLOEXEC;
}
if (kfds[1] >= 0 && kfds[1] < PROCESS_MAX_FILES && current_process->files[kfds[1]]) {
current_process->files[kfds[1]]->flags = flags & ~O_CLOEXEC;
}
if (flags & O_CLOEXEC) {
if (kfds[0] >= 0 && kfds[0] < PROCESS_MAX_FILES) current_process->fd_flags[kfds[0]] = FD_CLOEXEC;
if (kfds[1] >= 0 && kfds[1] < PROCESS_MAX_FILES) current_process->fd_flags[kfds[1]] = FD_CLOEXEC;
}
if (copy_to_user(user_fds, kfds, sizeof(kfds)) < 0) {
(void)fd_close(kfds[0]);
(void)fd_close(kfds[1]);
return -EFAULT;
}
return 0;
}
static int stat_from_node(const fs_node_t* node, struct stat* st) {
if (!node || !st) return -EFAULT;
st->st_ino = node->inode;
st->st_nlink = 1;
st->st_size = node->length;
st->st_uid = node->uid;
st->st_gid = node->gid;
uint32_t mode = node->mode & 07777;
if (node->flags == FS_DIRECTORY) mode |= S_IFDIR;
else if (node->flags == FS_CHARDEVICE) mode |= S_IFCHR;
else if (node->flags == FS_SYMLINK) mode |= S_IFLNK;
else mode |= S_IFREG;
if ((mode & 07777) == 0) mode |= 0755;
st->st_mode = mode;
return 0;
}
static int fd_alloc_from(int start_fd, struct file* f) {
if (!current_process || !f) return -EINVAL;
if (start_fd < 0) start_fd = 0;
if (start_fd >= PROCESS_MAX_FILES) return -EINVAL;
for (int fd = start_fd; fd < PROCESS_MAX_FILES; fd++) {
if (current_process->files[fd] == NULL) {
current_process->files[fd] = f;
return fd;
}
}
return -EMFILE;
}
static int fd_alloc(struct file* f) {
if (!current_process || !f) return -EINVAL;
for (int fd = 3; fd < PROCESS_MAX_FILES; fd++) {
if (current_process->files[fd] == NULL) {
current_process->files[fd] = f;
return fd;
}
}
return -EMFILE;
}
static struct file* fd_get(int fd) {
if (!current_process) return NULL;
if (fd < 0 || fd >= PROCESS_MAX_FILES) return NULL;
return current_process->files[fd];
}
static int fd_close(int fd) {
if (!current_process) return -EBADF;
if (fd < 0 || fd >= PROCESS_MAX_FILES) return -EBADF;
struct file* f = current_process->files[fd];
if (!f) return -EBADF;
current_process->files[fd] = NULL;
if (__sync_sub_and_fetch(&f->refcount, 1) == 0) {
if (f->node) {
vfs_close(f->node);
}
kfree(f);
}
return 0;
}
static int syscall_dup_impl(int oldfd) {
struct file* f = fd_get(oldfd);
if (!f) return -EBADF;
__sync_fetch_and_add(&f->refcount, 1);
int newfd = fd_alloc_from(0, f);
if (newfd < 0) {
__sync_sub_and_fetch(&f->refcount, 1);
return -EMFILE;
}
return newfd;
}
static int syscall_execve_impl(struct registers* regs, const char* user_path, const char* const* user_argv, const char* const* user_envp) {
if (!regs || !user_path) return -EFAULT;
enum {
EXECVE_MAX_ARGC = 32,
EXECVE_MAX_ENVC = 32,
EXECVE_MAX_STR = 128,
};
char path[128];
for (size_t i = 0; i < sizeof(path); i++) {
if (copy_from_user(&path[i], &user_path[i], 1) < 0) {
return -EFAULT;
}
if (path[i] == 0) break;
if (i + 1 == sizeof(path)) {
path[sizeof(path) - 1] = 0;
break;
}
}
// Snapshot argv/envp into kernel buffers (before switching addr_space).
char (*kargv)[EXECVE_MAX_STR] = (char(*)[EXECVE_MAX_STR])kmalloc((size_t)EXECVE_MAX_ARGC * (size_t)EXECVE_MAX_STR);
char (*kenvp)[EXECVE_MAX_STR] = (char(*)[EXECVE_MAX_STR])kmalloc((size_t)EXECVE_MAX_ENVC * (size_t)EXECVE_MAX_STR);
int argc = 0;
int envc = 0;
int ret = 0;
if (!kargv || !kenvp) {
ret = -ENOMEM;
goto out;
}
if (user_argv) {
for (int i = 0; i < EXECVE_MAX_ARGC; i++) {
uintptr_t up = 0;
int rc = execve_copy_user_ptr((const void* const*)&user_argv[i], &up);
if (rc < 0) { ret = rc; goto out; }
if (up == 0) break;
rc = execve_copy_user_str(kargv[i], sizeof(kargv[i]), (const char*)up);
if (rc < 0) { ret = rc; goto out; }
argc++;
}
}
if (user_envp) {
for (int i = 0; i < EXECVE_MAX_ENVC; i++) {
uintptr_t up = 0;
int rc = execve_copy_user_ptr((const void* const*)&user_envp[i], &up);
if (rc < 0) { ret = rc; goto out; }
if (up == 0) break;
rc = execve_copy_user_str(kenvp[i], sizeof(kenvp[i]), (const char*)up);
if (rc < 0) { ret = rc; goto out; }
envc++;
}
}
// Distinguish ENOENT early.
fs_node_t* node = vfs_lookup(path);
if (!node) { ret = -ENOENT; goto out; }
uintptr_t entry = 0;
uintptr_t user_sp = 0;
uintptr_t new_as = 0;
uintptr_t heap_brk = 0;
if (elf32_load_user_from_initrd(path, &entry, &user_sp, &new_as, &heap_brk) != 0) {
ret = -EINVAL;
goto out;
}
const size_t user_stack_size = 0x1000U;
if ((size_t)((argc + 1) + (envc + 1)) * sizeof(uintptr_t) + (size_t)argc * EXECVE_MAX_STR + (size_t)envc * EXECVE_MAX_STR + 64U > user_stack_size) { vmm_as_destroy(new_as); ret = -E2BIG; goto out; }
uintptr_t old_as = current_process ? current_process->addr_space : 0;
if (!current_process) {
vmm_as_destroy(new_as);
ret = -EINVAL;
goto out;
}
current_process->addr_space = new_as;
current_process->heap_start = heap_brk;
current_process->heap_break = heap_brk;
strncpy(current_process->cmdline, path, sizeof(current_process->cmdline) - 1);
current_process->cmdline[sizeof(current_process->cmdline) - 1] = '\0';
vmm_as_activate(new_as);
// Build a minimal initial user stack: argc, argv pointers, envp pointers, strings.
// The loader returns a fresh stack top (user_sp). We'll pack strings below it.
uintptr_t sp = user_sp;
sp &= ~(uintptr_t)0xF;
const uintptr_t sp_base = user_sp - user_stack_size;
uintptr_t argv_ptrs_va[EXECVE_MAX_ARGC + 1];
uintptr_t envp_ptrs_va[EXECVE_MAX_ENVC + 1];
for (int i = envc - 1; i >= 0; i--) {
size_t len = strlen(kenvp[i]) + 1;
if (sp - len < sp_base) { vmm_as_activate(old_as); current_process->addr_space = old_as; vmm_as_destroy(new_as); ret = -E2BIG; goto out; }
sp -= len;
memcpy((void*)sp, kenvp[i], len);
envp_ptrs_va[i] = sp;
}
envp_ptrs_va[envc] = 0;
for (int i = argc - 1; i >= 0; i--) {
size_t len = strlen(kargv[i]) + 1;
if (sp - len < sp_base) { vmm_as_activate(old_as); current_process->addr_space = old_as; vmm_as_destroy(new_as); ret = -E2BIG; goto out; }
sp -= len;
memcpy((void*)sp, kargv[i], len);
argv_ptrs_va[i] = sp;
}
argv_ptrs_va[argc] = 0;
sp &= ~(uintptr_t)0xF;
// Push auxv entries (if interpreter present) — must come right after envp[]
// so ld.so can find them by walking: argc → argv[] → envp[] → auxv.
{
elf32_auxv_t auxv_buf[8];
int auxv_n = elf32_pop_pending_auxv(auxv_buf, 8);
if (auxv_n > 0) {
sp -= (uintptr_t)(auxv_n * sizeof(elf32_auxv_t));
memcpy((void*)sp, auxv_buf, (size_t)auxv_n * sizeof(elf32_auxv_t));
}
}
// Push envp[] pointers
sp -= (uintptr_t)(sizeof(uintptr_t) * (envc + 1));
memcpy((void*)sp, envp_ptrs_va, sizeof(uintptr_t) * (envc + 1));
uintptr_t envp_va = sp;
// Push argv[] pointers
sp -= (uintptr_t)(sizeof(uintptr_t) * (argc + 1));
memcpy((void*)sp, argv_ptrs_va, sizeof(uintptr_t) * (argc + 1));
uintptr_t argv_va = sp;
// Push argc
sp -= sizeof(uint32_t);
*(uint32_t*)sp = (uint32_t)argc;
(void)argv_va;
(void)envp_va;
for (int i = 0; i < PROCESS_MAX_FILES; i++) {
if (current_process->fd_flags[i] & FD_CLOEXEC) {
(void)fd_close(i);
current_process->fd_flags[i] = 0;
}
}
if (old_as && old_as != new_as) {
vmm_as_destroy(old_as);
}
sc_ip(regs) = (uint32_t)entry;
sc_usp(regs) = (uint32_t)sp;
sc_ret(regs) = 0;
ret = 0;
goto out;
out:
if (kargv) kfree(kargv);
if (kenvp) kfree(kenvp);
return ret;
}
static int syscall_dup2_impl(int oldfd, int newfd) {
if (newfd < 0 || newfd >= PROCESS_MAX_FILES) return -EBADF;
struct file* f = fd_get(oldfd);
if (!f) return -EBADF;
if (oldfd == newfd) return newfd;
if (current_process && current_process->files[newfd]) {
(void)fd_close(newfd);
}
__sync_fetch_and_add(&f->refcount, 1);
current_process->files[newfd] = f;
return newfd;
}
static int syscall_dup3_impl(int oldfd, int newfd, uint32_t flags) {
// Minimal: accept only flags==0 for now.
if (flags != 0) return -EINVAL;
if (newfd < 0 || newfd >= PROCESS_MAX_FILES) return -EBADF;
if (oldfd == newfd) return -EINVAL;
struct file* f = fd_get(oldfd);
if (!f) return -EBADF;
if (current_process && current_process->files[newfd]) {
(void)fd_close(newfd);
}
__sync_fetch_and_add(&f->refcount, 1);
current_process->files[newfd] = f;
return newfd;
}
static int syscall_stat_impl(const char* user_path, struct stat* user_st) {
if (!user_path || !user_st) return -EFAULT;
if (user_range_ok(user_st, sizeof(*user_st)) == 0) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
fs_node_t* node = vfs_lookup(path);
if (!node) return -ENOENT;
struct stat st;
int rc = stat_from_node(node, &st);
if (rc < 0) return rc;
if (copy_to_user(user_st, &st, sizeof(st)) < 0) return -EFAULT;
return 0;
}
static int syscall_fstatat_impl(int dirfd, const char* user_path, struct stat* user_st, uint32_t flags) {
(void)flags;
if (dirfd != AT_FDCWD) return -ENOSYS;
return syscall_stat_impl(user_path, user_st);
}
static int syscall_fstat_impl(int fd, struct stat* user_st) {
if (!user_st) return -EFAULT;
if (user_range_ok(user_st, sizeof(*user_st)) == 0) return -EFAULT;
struct file* f = fd_get(fd);
if (!f || !f->node) return -EBADF;
struct stat st;
int rc = stat_from_node(f->node, &st);
if (rc < 0) return rc;
if (copy_to_user(user_st, &st, sizeof(st)) < 0) return -EFAULT;
return 0;
}
static int syscall_lseek_impl(int fd, int32_t offset, int whence) {
if (fd == 0 || fd == 1 || fd == 2) return -ESPIPE;
struct file* f = fd_get(fd);
if (!f || !f->node) return -EBADF;
if (f->node->flags != FS_FILE) return -ESPIPE;
int64_t base = 0;
if (whence == 0) {
base = 0;
} else if (whence == 1) {
base = (int64_t)f->offset;
} else if (whence == 2) {
base = (int64_t)f->node->length;
} else {
return -EINVAL;
}
int64_t noff = base + (int64_t)offset;
if (noff < 0) return -EINVAL;
if (noff > (int64_t)f->node->length) return -EINVAL;
f->offset = (uint32_t)noff;
return (int)f->offset;
}
/*
* Check if the current process has the requested access to a file node.
* want: bitmask of 4 (read), 2 (write), 1 (execute).
* Returns 0 if allowed, -EACCES if denied.
*/
static int vfs_check_permission(fs_node_t* node, int want) {
if (!current_process) return 0; /* kernel context — allow all */
if (current_process->euid == 0) return 0; /* root — allow all */
if (node->mode == 0) return 0; /* mode not set — permissive */
uint32_t mode = node->mode;
uint32_t perm;
if (current_process->euid == node->uid) {
perm = (mode >> 6) & 7; /* owner bits */
} else if (current_process->egid == node->gid) {
perm = (mode >> 3) & 7; /* group bits */
} else {
perm = mode & 7; /* other bits */
}
if ((want & perm) != (uint32_t)want) return -EACCES;
return 0;
}
static int syscall_open_impl(const char* user_path, uint32_t flags) {
if (!user_path) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
fs_node_t* node = vfs_lookup(path);
if (!node && (flags & 0x40U) != 0U) {
/* O_CREAT: create file through VFS */
int rc = vfs_create(path, flags, &node);
if (rc < 0) return rc;
} else if (!node) {
return -ENOENT;
} else if ((flags & 0x200U) != 0U && node->flags == FS_FILE) {
/* O_TRUNC on existing file */
if (node->i_ops && node->i_ops->truncate) {
node->i_ops->truncate(node, 0);
node->length = 0;
}
}
/* Permission check based on open flags */
{
int want = 4; /* default: read */
uint32_t acc = flags & 3U; /* O_RDONLY=0, O_WRONLY=1, O_RDWR=2 */
if (acc == 1) want = 2; /* write only */
else if (acc == 2) want = 6; /* read + write */
int perm_rc = vfs_check_permission(node, want);
if (perm_rc < 0) return perm_rc;
}
struct file* f = (struct file*)kmalloc(sizeof(*f));
if (!f) return -ENOMEM;
f->node = node;
f->offset = 0;
f->flags = flags;
f->refcount = 1;
int fd = fd_alloc(f);
if (fd < 0) {
kfree(f);
return -EMFILE;
}
if ((flags & O_CLOEXEC) && current_process) {
current_process->fd_flags[fd] = FD_CLOEXEC;
}
return fd;
}
static int syscall_openat_impl(int dirfd, const char* user_path, uint32_t flags, uint32_t mode) {
(void)mode;
if (dirfd != AT_FDCWD) return -ENOSYS;
return syscall_open_impl(user_path, flags);
}
static int syscall_fcntl_impl(int fd, int cmd, uint32_t arg) {
struct file* f = fd_get(fd);
if (!f) return -EBADF;
if (cmd == FCNTL_F_GETFD) {
if (!current_process) return 0;
return (int)current_process->fd_flags[fd];
}
if (cmd == FCNTL_F_SETFD) {
if (!current_process) return -EINVAL;
current_process->fd_flags[fd] = (uint8_t)(arg & FD_CLOEXEC);
return 0;
}
if (cmd == FCNTL_F_GETFL) {
return (int)f->flags;
}
if (cmd == FCNTL_F_SETFL) {
uint32_t keep = f->flags & ~(O_NONBLOCK | O_APPEND);
uint32_t set = arg & (O_NONBLOCK | O_APPEND);
f->flags = keep | set;
return 0;
}
if (cmd == FCNTL_F_GETLK || cmd == FCNTL_F_SETLK || cmd == FCNTL_F_SETLKW) {
if (!current_process || !f->node) return -EINVAL;
void* user_fl = (void*)(uintptr_t)arg;
if (!user_fl || user_range_ok(user_fl, sizeof(struct k_flock)) == 0)
return -EFAULT;
struct k_flock kfl;
if (copy_from_user(&kfl, user_fl, sizeof(kfl)) < 0) return -EFAULT;
uint32_t ino = f->node->inode;
uint32_t start = kfl.l_start;
uint32_t end = (kfl.l_len == 0) ? 0xFFFFFFFFU : start + kfl.l_len - 1;
if (cmd == FCNTL_F_GETLK) {
uintptr_t fl = spin_lock_irqsave(&rlock_lock_g);
struct rlock_entry* conflict = NULL;
int has = rlock_conflicts(ino, current_process->pid,
kfl.l_type, start, end, &conflict);
if (has && conflict) {
kfl.l_type = (int16_t)conflict->type;
kfl.l_whence = 0; /* SEEK_SET */
kfl.l_start = conflict->start;
kfl.l_len = (conflict->end == 0xFFFFFFFFU) ? 0
: conflict->end - conflict->start + 1;
kfl.l_pid = conflict->pid;
} else {
kfl.l_type = F_UNLCK;
}
spin_unlock_irqrestore(&rlock_lock_g, fl);
if (copy_to_user(user_fl, &kfl, sizeof(kfl)) < 0) return -EFAULT;
return 0;
}
return rlock_setlk(ino, current_process->pid, kfl.l_type,
start, end, cmd == FCNTL_F_SETLKW);
}
if (cmd == FCNTL_F_GETPIPE_SZ) {
if (!f->node) return -EBADF;
if (f->node->f_ops != &pipe_read_fops && f->node->f_ops != &pipe_write_fops)
return -ENOTTY;
struct pipe_node* pn = (struct pipe_node*)f->node;
return (int)pn->ps->cap;
}
if (cmd == FCNTL_F_SETPIPE_SZ) {
if (!f->node) return -EBADF;
if (f->node->f_ops != &pipe_read_fops && f->node->f_ops != &pipe_write_fops)
return -ENOTTY;
struct pipe_node* pn = (struct pipe_node*)f->node;
struct pipe_state* ps = pn->ps;
uint32_t new_cap = arg;
if (new_cap < 512) new_cap = 512;
if (new_cap > 65536) new_cap = 65536;
if (new_cap == ps->cap) return (int)ps->cap;
if (new_cap < ps->count) return -EBUSY;
uint8_t* new_buf = (uint8_t*)kmalloc(new_cap);
if (!new_buf) return -ENOMEM;
for (uint32_t i = 0; i < ps->count; i++) {
new_buf[i] = ps->buf[(ps->rpos + i) % ps->cap];
}
kfree(ps->buf);
ps->buf = new_buf;
ps->rpos = 0;
ps->wpos = ps->count;
ps->cap = new_cap;
return (int)ps->cap;
}
if (cmd == FCNTL_F_DUPFD_CLOEXEC) {
if (!current_process) return -EINVAL;
int new_fd = -1;
for (int i = (int)arg; i < PROCESS_MAX_FILES; i++) {
if (!current_process->files[i]) { new_fd = i; break; }
}
if (new_fd < 0) return -EMFILE;
current_process->files[new_fd] = f;
f->refcount++;
current_process->fd_flags[new_fd] = FD_CLOEXEC;
return new_fd;
}
return -EINVAL;
}
static int path_is_absolute(const char* p) {
return p && p[0] == '/';
}
static void path_normalize_inplace(char* s) {
if (!s) return;
if (s[0] == 0) {
strcpy(s, "/");
return;
}
// Phase 1: split into components, resolve '.' and '..'
char tmp[128];
// Stack of component start offsets within tmp
size_t comp_start[32];
int depth = 0;
size_t w = 0;
const char* p = s;
int absolute = (*p == '/');
if (absolute) {
tmp[w++] = '/';
while (*p == '/') p++;
}
while (*p != 0) {
// Extract next component
const char* seg = p;
while (*p != 0 && *p != '/') p++;
size_t seg_len = (size_t)(p - seg);
while (*p == '/') p++;
if (seg_len == 1 && seg[0] == '.') {
continue; // skip '.'
}
if (seg_len == 2 && seg[0] == '.' && seg[1] == '.') {
// Go up one level
if (depth > 0) {
depth--;
w = comp_start[depth];
}
continue;
}
// Record start of this component
if (depth < 32) {
comp_start[depth++] = w;
}
// Append separator if needed
if (w > 1 || (w == 1 && tmp[0] != '/')) {
if (w + 1 < sizeof(tmp)) tmp[w++] = '/';
}
// Append component
for (size_t i = 0; i < seg_len && w + 1 < sizeof(tmp); i++) {
tmp[w++] = seg[i];
}
}
// Handle empty result
if (w == 0) {
tmp[w++] = '/';
}
// Remove trailing slash (except root)
while (w > 1 && tmp[w - 1] == '/') {
w--;
}
tmp[w] = 0;
strcpy(s, tmp);
}
static int path_resolve_user(const char* user_path, char* out, size_t out_sz) {
if (!out || out_sz == 0) return -EINVAL;
out[0] = 0;
if (!user_path) return -EFAULT;
char in[128];
for (size_t i = 0; i < sizeof(in); i++) {
if (copy_from_user(&in[i], &user_path[i], 1) < 0) {
return -EFAULT;
}
if (in[i] == 0) break;
if (i + 1 == sizeof(in)) {
in[sizeof(in) - 1] = 0;
break;
}
}
if (path_is_absolute(in)) {
// bounded copy
size_t i = 0;
while (in[i] != 0 && i + 1 < out_sz) {
out[i] = in[i];
i++;
}
out[i] = 0;
path_normalize_inplace(out);
return 0;
}
const char* base = (current_process && current_process->cwd[0]) ? current_process->cwd : "/";
size_t w = 0;
if (strcmp(base, "/") == 0) {
if (out_sz < 2) return -ENAMETOOLONG;
out[w++] = '/';
} else {
for (size_t i = 0; base[i] != 0 && w + 1 < out_sz; i++) {
out[w++] = base[i];
}
if (w + 1 < out_sz) out[w++] = '/';
}
for (size_t i = 0; in[i] != 0 && w + 1 < out_sz; i++) {
out[w++] = in[i];
}
out[w] = 0;
path_normalize_inplace(out);
return 0;
}
static int syscall_chdir_impl(const char* user_path) {
if (!current_process) return -EINVAL;
char path[128];
int rc = path_resolve_user(user_path, path, sizeof(path));
if (rc < 0) return rc;
fs_node_t* n = vfs_lookup(path);
if (!n) return -ENOENT;
if (n->flags != FS_DIRECTORY) return -ENOTDIR;
strcpy(current_process->cwd, path);
return 0;
}
static int syscall_getcwd_impl(char* user_buf, uint32_t size) {
if (!current_process) return -EINVAL;
if (!user_buf) return -EFAULT;
if (size == 0) return -EINVAL;
if (user_range_ok(user_buf, (size_t)size) == 0) return -EFAULT;
const char* cwd = current_process->cwd[0] ? current_process->cwd : "/";
uint32_t need = (uint32_t)strlen(cwd) + 1U;
if (need > size) return -ERANGE;
if (copy_to_user(user_buf, cwd, need) < 0) return -EFAULT;
return 0;
}
static int syscall_mkdir_impl(const char* user_path) {
if (!user_path) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
return vfs_mkdir(path);
}
static int syscall_getdents_impl(int fd, void* user_buf, uint32_t len) {
if (len == 0) return 0;
if (!user_buf) return -EFAULT;
if (user_range_ok(user_buf, (size_t)len) == 0) return -EFAULT;
struct file* f = fd_get(fd);
if (!f || !f->node) return -EBADF;
if (f->node->flags != FS_DIRECTORY) return -ENOTDIR;
int (*fn_readdir)(struct fs_node*, uint32_t*, void*, uint32_t) = NULL;
if (f->node->i_ops && f->node->i_ops->readdir) fn_readdir = f->node->i_ops->readdir;
if (!fn_readdir) return -ENOSYS;
uint8_t kbuf[256];
uint32_t klen = len;
if (klen > (uint32_t)sizeof(kbuf)) klen = (uint32_t)sizeof(kbuf);
uint32_t idx = f->offset;
int rc = fn_readdir(f->node, &idx, kbuf, klen);
if (rc < 0) return rc;
if (rc == 0) return 0;
if (copy_to_user(user_buf, kbuf, (uint32_t)rc) < 0) return -EFAULT;
f->offset = idx;
return rc;
}
static int syscall_unlink_impl(const char* user_path) {
if (!user_path) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
return vfs_unlink(path);
}
static int syscall_unlinkat_impl(int dirfd, const char* user_path, uint32_t flags) {
(void)flags;
if (dirfd != AT_FDCWD) return -ENOSYS;
return syscall_unlink_impl(user_path);
}
static int syscall_rmdir_impl(const char* user_path) {
if (!user_path) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
return vfs_rmdir(path);
}
static int syscall_rename_impl(const char* user_old, const char* user_new) {
if (!user_old || !user_new) return -EFAULT;
char oldp[128];
char newp[128];
int rc = path_resolve_user(user_old, oldp, sizeof(oldp));
if (rc < 0) return rc;
rc = path_resolve_user(user_new, newp, sizeof(newp));
if (rc < 0) return rc;
return vfs_rename(oldp, newp);
}
static int syscall_read_impl(int fd, void* user_buf, uint32_t len) {
if (len > 1024 * 1024) return -EINVAL;
if (user_range_ok(user_buf, (size_t)len) == 0) return -EFAULT;
if (fd == 0 && (!current_process || !current_process->files[0])) {
return tty_read(user_buf, len);
}
if ((fd == 1 || fd == 2) && (!current_process || !current_process->files[fd])) return -EBADF;
struct file* f = fd_get(fd);
if (!f || !f->node) return -EBADF;
int nonblock = (f->flags & O_NONBLOCK) ? 1 : 0;
{
int (*fn_poll)(fs_node_t*, int) = NULL;
if (f->node->f_ops && f->node->f_ops->poll) fn_poll = f->node->f_ops->poll;
if (nonblock && fn_poll) {
int rev = fn_poll(f->node, VFS_POLL_IN);
if (!(rev & (VFS_POLL_IN | VFS_POLL_ERR | VFS_POLL_HUP)))
return -EAGAIN;
}
}
if (f->node->flags == FS_CHARDEVICE) {
uint8_t kbuf[256];
uint32_t total = 0;
while (total < len) {
uint32_t chunk = len - total;
if (chunk > sizeof(kbuf)) chunk = (uint32_t)sizeof(kbuf);
uint32_t rd = vfs_read(f->node, 0, chunk, kbuf);
if (rd == 0) break;
if (copy_to_user((uint8_t*)user_buf + total, kbuf, rd) < 0) {
return -EFAULT;
}
total += rd;
if (rd < chunk) break;
}
return (int)total;
}
if (!(f->node->f_ops && f->node->f_ops->read)) return -ESPIPE;
uint8_t kbuf[256];
uint32_t total = 0;
while (total < len) {
uint32_t chunk = len - total;
if (chunk > sizeof(kbuf)) chunk = (uint32_t)sizeof(kbuf);
uint32_t rd = vfs_read(f->node, f->offset, chunk, kbuf);
if (rd == 0) break;
if (copy_to_user((uint8_t*)user_buf + total, kbuf, rd) < 0) {
return -EFAULT;
}
f->offset += rd;
total += rd;
if (rd < chunk) break;
}
return (int)total;
}
static int syscall_write_impl(int fd, const void* user_buf, uint32_t len);
static int syscall_write_impl(int fd, const void* user_buf, uint32_t len) {
if (len > 1024 * 1024) return -EINVAL;
if (user_range_ok(user_buf, (size_t)len) == 0) return -EFAULT;
if ((fd == 1 || fd == 2) && (!current_process || !current_process->files[fd])) {
return tty_write((const char*)user_buf, len);
}
if (fd == 0) return -EBADF;
struct file* f = fd_get(fd);
if (!f || !f->node) return -EBADF;
int nonblock = (f->flags & O_NONBLOCK) ? 1 : 0;
{
int (*fn_poll)(fs_node_t*, int) = NULL;
if (f->node->f_ops && f->node->f_ops->poll) fn_poll = f->node->f_ops->poll;
if (nonblock && fn_poll) {
int rev = fn_poll(f->node, VFS_POLL_OUT);
if (!(rev & (VFS_POLL_OUT | VFS_POLL_ERR)))
return -EAGAIN;
}
}
if (!(f->node->f_ops && f->node->f_ops->write)) return -ESPIPE;
if (f->node->flags != FS_FILE && f->node->flags != FS_CHARDEVICE && f->node->flags != FS_SOCKET) return -ESPIPE;
if ((f->flags & O_APPEND) && (f->node->flags & FS_FILE)) {
f->offset = f->node->length;
}
uint8_t kbuf[1024];
uint32_t total = 0;
while (total < len) {
uint32_t chunk = len - total;
if (chunk > sizeof(kbuf)) chunk = (uint32_t)sizeof(kbuf);
if (copy_from_user(kbuf, (const uint8_t*)user_buf + total, chunk) < 0) {
return -EFAULT;
}
uint32_t wr = vfs_write(f->node, ((f->node->flags & FS_FILE) != 0) ? f->offset : 0, chunk, kbuf);
if (wr == 0) break;
if ((f->node->flags & FS_FILE) != 0) f->offset += wr;
total += wr;
if (wr < chunk) break;
}
return (int)total;
}
static int syscall_ioctl_impl(int fd, uint32_t cmd, void* user_arg) {
struct file* f = fd_get(fd);
if (!f || !f->node) return -EBADF;
fs_node_t* n = f->node;
if (n->f_ops && n->f_ops->ioctl) return n->f_ops->ioctl(n, cmd, user_arg);
return -ENOTTY;
}
static int syscall_setsid_impl(void) {
if (!current_process) return -EINVAL;
if (current_process->pid != 0 && current_process->pgrp_id == current_process->pid) return -EPERM;
current_process->session_id = current_process->pid;
current_process->pgrp_id = current_process->pid;
return (int)current_process->session_id;
}
static int syscall_setpgid_impl(int pid, int pgid) {
if (!current_process) return -EINVAL;
if (pid != 0 && pid != (int)current_process->pid) return -EINVAL;
if (pgid == 0) pgid = (int)current_process->pid;
if (pgid < 0) return -EINVAL;
current_process->pgrp_id = (uint32_t)pgid;
return 0;
}
static int syscall_getpgrp_impl(void) {
if (!current_process) return 0;
return (int)current_process->pgrp_id;
}
static int syscall_sigaction_impl(int sig, const struct sigaction* user_act, struct sigaction* user_oldact) {
if (!current_process) return -EINVAL;
if (sig <= 0 || sig >= PROCESS_MAX_SIG) return -EINVAL;
if (user_oldact) {
if (user_range_ok(user_oldact, sizeof(*user_oldact)) == 0) return -EFAULT;
struct sigaction old = current_process->sigactions[sig];
if (copy_to_user(user_oldact, &old, sizeof(old)) < 0) return -EFAULT;
}
if (!user_act) {
return 0;
}
if (user_range_ok(user_act, sizeof(*user_act)) == 0) return -EFAULT;
struct sigaction act;
if (copy_from_user(&act, user_act, sizeof(act)) < 0) return -EFAULT;
current_process->sigactions[sig] = act;
return 0;
}
static int syscall_sigprocmask_impl(uint32_t how, uint32_t mask, uint32_t* old_out) {
if (!current_process) return -EINVAL;
if (old_out) {
if (user_range_ok(old_out, sizeof(*old_out)) == 0) return -EFAULT;
uint32_t old = current_process->sig_blocked_mask;
if (copy_to_user(old_out, &old, sizeof(old)) < 0) return -EFAULT;
}
if (how == 0U) {
current_process->sig_blocked_mask = mask;
return 0;
}
if (how == 1U) {
current_process->sig_blocked_mask |= mask;
return 0;
}
if (how == 2U) {
current_process->sig_blocked_mask &= ~mask;
return 0;
}
return -EINVAL;
}
struct timespec {
uint32_t tv_sec;
uint32_t tv_nsec;
};
enum {
CLOCK_REALTIME = 0,
CLOCK_MONOTONIC = 1,
};
static int syscall_nanosleep_impl(const struct timespec* user_req, struct timespec* user_rem) {
if (!user_req) return -EFAULT;
if (user_range_ok(user_req, sizeof(struct timespec)) == 0) return -EFAULT;
struct timespec req;
if (copy_from_user(&req, user_req, sizeof(req)) < 0) return -EFAULT;
if (req.tv_nsec >= 1000000000U) return -EINVAL;
uint32_t ms = req.tv_sec * 1000U + req.tv_nsec / 1000000U;
uint32_t ticks = (ms + TIMER_MS_PER_TICK - 1) / TIMER_MS_PER_TICK;
if (ticks == 0 && (req.tv_sec > 0 || req.tv_nsec > 0)) ticks = 1;
if (ticks > 0) {
process_sleep(ticks);
}
if (user_rem) {
if (user_range_ok(user_rem, sizeof(struct timespec)) != 0) {
struct timespec rem = {0, 0};
(void)copy_to_user(user_rem, &rem, sizeof(rem));
}
}
return 0;
}
static int syscall_clock_gettime_impl(uint32_t clk_id, struct timespec* user_tp) {
if (!user_tp) return -EFAULT;
if (user_range_ok(user_tp, sizeof(struct timespec)) == 0) return -EFAULT;
if (clk_id != CLOCK_REALTIME && clk_id != CLOCK_MONOTONIC) return -EINVAL;
struct timespec tp;
if (clk_id == CLOCK_REALTIME) {
tp.tv_sec = rtc_unix_timestamp();
tp.tv_nsec = 0;
} else {
uint64_t ns = clock_gettime_ns();
tp.tv_sec = (uint32_t)(ns / 1000000000ULL);
tp.tv_nsec = (uint32_t)(ns % 1000000000ULL);
}
if (copy_to_user(user_tp, &tp, sizeof(tp)) < 0) return -EFAULT;
return 0;
}
enum {
PROT_NONE = 0x0,
PROT_READ = 0x1,
PROT_WRITE = 0x2,
PROT_EXEC = 0x4,
};
enum {
MAP_SHARED = 0x01,
MAP_PRIVATE = 0x02,
MAP_FIXED = 0x10,
MAP_ANONYMOUS = 0x20,
};
static uintptr_t mmap_find_free(uint32_t length) {
if (!current_process) return 0;
const uintptr_t MMAP_BASE = 0x40000000U;
const uintptr_t MMAP_END = 0x7FF00000U;
return vmm_find_free_area(MMAP_BASE, MMAP_END, length);
}
__attribute__((noinline))
static uintptr_t syscall_mmap_impl(uintptr_t addr, uint32_t length, uint32_t prot,
uint32_t flags, int fd, uint32_t offset) {
if (!current_process) return (uintptr_t)-EINVAL;
if (length == 0) return (uintptr_t)-EINVAL;
int is_anon = (flags & MAP_ANONYMOUS) != 0;
/* fd-backed mmap: the file's node must provide a mmap callback */
fs_node_t* mmap_node = NULL;
if (!is_anon) {
if (fd < 0) return (uintptr_t)-EBADF;
struct file* f = fd_get(fd);
if (!f || !f->node) return (uintptr_t)-EBADF;
if (!(f->node->f_ops && f->node->f_ops->mmap)) return (uintptr_t)-ENOSYS;
mmap_node = f->node;
}
uint32_t aligned_len = (length + 0xFFFU) & ~(uint32_t)0xFFFU;
uintptr_t base;
if (flags & MAP_FIXED) {
if (addr == 0 || (addr & 0xFFF)) return (uintptr_t)-EINVAL;
if (hal_mm_kernel_virt_base() && addr >= hal_mm_kernel_virt_base()) return (uintptr_t)-EINVAL;
base = addr;
} else {
base = mmap_find_free(aligned_len);
if (!base) return (uintptr_t)-ENOMEM;
}
int slot = -1;
for (int i = 0; i < PROCESS_MAX_MMAPS; i++) {
if (current_process->mmaps[i].length == 0) { slot = i; break; }
}
if (slot < 0) return (uintptr_t)-ENOMEM;
if (mmap_node) {
/* Device-backed mmap: delegate to the node's mmap callback */
uintptr_t (*fn_mmap)(fs_node_t*, uintptr_t, uint32_t, uint32_t, uint32_t) = NULL;
if (mmap_node->f_ops && mmap_node->f_ops->mmap) fn_mmap = mmap_node->f_ops->mmap;
if (!fn_mmap) return (uintptr_t)-ENOSYS;
uintptr_t result = fn_mmap(mmap_node, base, aligned_len, prot, offset);
if (!result) return (uintptr_t)-ENOMEM;
base = result;
} else {
/* Anonymous mmap: allocate fresh zeroed pages */
uint32_t vmm_flags = VMM_FLAG_PRESENT | VMM_FLAG_USER;
if (prot & PROT_WRITE) vmm_flags |= VMM_FLAG_RW;
for (uintptr_t va = base; va < base + aligned_len; va += 0x1000U) {
void* frame = pmm_alloc_page();
if (!frame) return (uintptr_t)-ENOMEM;
vmm_map_page((uint64_t)(uintptr_t)frame, (uint64_t)va, vmm_flags);
memset((void*)va, 0, 0x1000U);
}
}
current_process->mmaps[slot].base = base;
current_process->mmaps[slot].length = aligned_len;
current_process->mmaps[slot].shmid = -1;
return base;
}
static int syscall_munmap_impl(uintptr_t addr, uint32_t length) {
if (!current_process) return -EINVAL;
if (addr == 0 || (addr & 0xFFF)) return -EINVAL;
if (length == 0) return -EINVAL;
uint32_t aligned_len = (length + 0xFFFU) & ~(uint32_t)0xFFFU;
int found = -1;
for (int i = 0; i < PROCESS_MAX_MMAPS; i++) {
if (current_process->mmaps[i].base == addr &&
current_process->mmaps[i].length == aligned_len) {
found = i;
break;
}
}
if (found < 0) return -EINVAL;
for (uintptr_t va = addr; va < addr + aligned_len; va += 0x1000U) {
vmm_unmap_page((uint64_t)va);
}
current_process->mmaps[found].base = 0;
current_process->mmaps[found].length = 0;
current_process->mmaps[found].shmid = -1;
return 0;
}
static uintptr_t syscall_brk_impl(uintptr_t addr) {
if (!current_process) return 0;
if (addr == 0) {
return current_process->heap_break;
}
const uintptr_t KERN_BASE = hal_mm_kernel_virt_base();
const uintptr_t USER_STACK_BASE = 0x40000000U;
if (addr < current_process->heap_start) return current_process->heap_break;
if (addr >= USER_STACK_BASE) return current_process->heap_break;
if (KERN_BASE && addr >= KERN_BASE) return current_process->heap_break;
uintptr_t old_brk = current_process->heap_break;
uintptr_t new_brk = (addr + 0xFFFU) & ~(uintptr_t)0xFFFU;
uintptr_t old_brk_page = (old_brk + 0xFFFU) & ~(uintptr_t)0xFFFU;
if (new_brk > old_brk_page) {
for (uintptr_t va = old_brk_page; va < new_brk; va += 0x1000U) {
void* frame = pmm_alloc_page();
if (!frame) {
return current_process->heap_break;
}
vmm_as_map_page(current_process->addr_space,
(uint64_t)(uintptr_t)frame, (uint64_t)va,
VMM_FLAG_PRESENT | VMM_FLAG_RW | VMM_FLAG_USER);
memset((void*)va, 0, 0x1000U);
}
} else if (new_brk < old_brk_page) {
for (uintptr_t va = new_brk; va < old_brk_page; va += 0x1000U) {
vmm_unmap_page((uint64_t)va);
}
}
current_process->heap_break = addr;
return addr;
}
static int syscall_symlink_impl(const char* user_target, const char* user_linkpath) {
if (!user_target || !user_linkpath) return -EFAULT;
char target[128], linkpath[128];
if (copy_from_user(target, user_target, sizeof(target)) < 0) return -EFAULT;
target[sizeof(target) - 1] = 0;
int prc = path_resolve_user(user_linkpath, linkpath, sizeof(linkpath));
if (prc < 0) return prc;
/* Find parent directory */
char parent[128];
char leaf[128];
strcpy(parent, linkpath);
char* last_slash = NULL;
for (char* p = parent; *p; p++) {
if (*p == '/') last_slash = p;
}
if (!last_slash) return -EINVAL;
if (last_slash == parent) {
parent[1] = 0;
strcpy(leaf, linkpath + 1);
} else {
*last_slash = 0;
strcpy(leaf, last_slash + 1);
}
if (leaf[0] == 0) return -EINVAL;
fs_node_t* dir = vfs_lookup(parent);
if (!dir || dir->flags != FS_DIRECTORY) return -ENOENT;
return tmpfs_create_symlink(dir, leaf, target);
}
static int syscall_readlink_impl(const char* user_path, char* user_buf, uint32_t bufsz) {
if (!user_path || !user_buf) return -EFAULT;
if (bufsz == 0) return -EINVAL;
if (user_range_ok(user_buf, (size_t)bufsz) == 0) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
/* readlink must NOT follow the final symlink — look up parent + finddir */
char parent[128];
char leaf[128];
strcpy(parent, path);
char* last_slash = NULL;
for (char* p = parent; *p; p++) {
if (*p == '/') last_slash = p;
}
if (!last_slash) return -EINVAL;
if (last_slash == parent) {
parent[1] = 0;
strcpy(leaf, path + 1);
} else {
*last_slash = 0;
strcpy(leaf, last_slash + 1);
}
fs_node_t* dir = vfs_lookup(parent);
if (!dir) return -ENOENT;
fs_node_t* (*fn_finddir)(fs_node_t*, const char*) = NULL;
if (dir->i_ops && dir->i_ops->lookup) fn_finddir = dir->i_ops->lookup;
if (!fn_finddir) return -ENOENT;
fs_node_t* node = fn_finddir(dir, leaf);
if (!node) return -ENOENT;
if (node->flags != FS_SYMLINK) return -EINVAL;
uint32_t len = (uint32_t)strlen(node->symlink_target);
if (len > bufsz) len = bufsz;
if (copy_to_user(user_buf, node->symlink_target, len) < 0) return -EFAULT;
return (int)len;
}
static int syscall_link_impl(const char* user_oldpath, const char* user_newpath) {
if (!user_oldpath || !user_newpath) return -EFAULT;
char old_path[128], new_path[128];
int rc1 = path_resolve_user(user_oldpath, old_path, sizeof(old_path));
if (rc1 < 0) return rc1;
int rc2 = path_resolve_user(user_newpath, new_path, sizeof(new_path));
if (rc2 < 0) return rc2;
return vfs_link(old_path, new_path);
}
static int syscall_chmod_impl(const char* user_path, uint32_t mode) {
if (!user_path) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
fs_node_t* node = vfs_lookup(path);
if (!node) return -ENOENT;
/* Only root or file owner can chmod */
if (current_process && current_process->euid != 0 &&
current_process->euid != node->uid) {
return -EPERM;
}
node->mode = mode & 07777;
return 0;
}
static int syscall_chown_impl(const char* user_path, uint32_t uid, uint32_t gid) {
if (!user_path) return -EFAULT;
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) return prc;
fs_node_t* node = vfs_lookup(path);
if (!node) return -ENOENT;
/* Only root can chown */
if (current_process && current_process->euid != 0) {
return -EPERM;
}
node->uid = uid;
node->gid = gid;
return 0;
}
void syscall_handler(struct registers* regs) {
uint32_t syscall_no = sc_num(regs);
if (syscall_no == SYSCALL_WRITE) {
uint32_t fd = sc_arg0(regs);
const char* buf = (const char*)sc_arg1(regs);
uint32_t len = sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_write_impl((int)fd, buf, len);
return;
}
if (syscall_no == SYSCALL_GETPID) {
sc_ret(regs) = current_process ? current_process->pid : 0;
return;
}
if (syscall_no == SYSCALL_GETPPID) {
sc_ret(regs) = current_process ? current_process->parent_pid : 0;
return;
}
if (syscall_no == SYSCALL_OPEN) {
const char* path = (const char*)sc_arg0(regs);
uint32_t flags = sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_open_impl(path, flags);
return;
}
if (syscall_no == SYSCALL_OPENAT) {
int dirfd = (int)sc_arg0(regs);
const char* path = (const char*)sc_arg1(regs);
uint32_t flags = (uint32_t)sc_arg2(regs);
uint32_t mode = (uint32_t)sc_arg3(regs);
sc_ret(regs) = (uint32_t)syscall_openat_impl(dirfd, path, flags, mode);
return;
}
if (syscall_no == SYSCALL_CHDIR) {
const char* path = (const char*)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_chdir_impl(path);
return;
}
if (syscall_no == SYSCALL_GETCWD) {
char* buf = (char*)sc_arg0(regs);
uint32_t size = (uint32_t)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_getcwd_impl(buf, size);
return;
}
if (syscall_no == SYSCALL_READ) {
int fd = (int)sc_arg0(regs);
void* buf = (void*)sc_arg1(regs);
uint32_t len = sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_read_impl(fd, buf, len);
return;
}
if (syscall_no == SYSCALL_CLOSE) {
int fd = (int)sc_arg0(regs);
sc_ret(regs) = (uint32_t)fd_close(fd);
return;
}
if (syscall_no == SYSCALL_EXIT) {
int status = (int)sc_arg0(regs);
if (current_process) {
flock_release_pid(current_process->pid);
rlock_release_pid(current_process->pid);
}
for (int fd = 0; fd < PROCESS_MAX_FILES; fd++) {
if (current_process && current_process->files[fd]) {
(void)fd_close(fd);
}
}
process_exit_notify(status);
hal_cpu_enable_interrupts();
schedule();
for(;;) {
hal_cpu_disable_interrupts();
hal_cpu_idle();
}
}
if (syscall_no == SYSCALL_WAITPID) {
int pid = (int)sc_arg0(regs);
int* user_status = (int*)sc_arg1(regs);
uint32_t options = sc_arg2(regs);
if (user_status && user_range_ok(user_status, sizeof(int)) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
int status = 0;
int retpid = process_waitpid(pid, &status, options);
if (retpid < 0) {
sc_ret(regs) = (uint32_t)retpid;
return;
}
if (retpid == 0) {
sc_ret(regs) = 0;
return;
}
if (user_status) {
if (copy_to_user(user_status, &status, sizeof(status)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
}
sc_ret(regs) = (uint32_t)retpid;
return;
}
if (syscall_no == SYSCALL_WAITID) {
/* waitid(idtype, id, siginfo_t* infop, options)
* idtype: 0=P_ALL, 1=P_PID, 2=P_PGID */
uint32_t idtype = sc_arg0(regs);
uint32_t id = sc_arg1(regs);
void* user_infop = (void*)sc_arg2(regs);
uint32_t options = (uint32_t)((int32_t)sc_arg3(regs));
int wait_pid_arg;
if (idtype == 0) wait_pid_arg = -1; /* P_ALL */
else if (idtype == 1) wait_pid_arg = (int)id; /* P_PID */
else { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (user_infop && user_range_ok(user_infop, 16) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
int status = 0;
int retpid = process_waitpid(wait_pid_arg, &status, options);
if (retpid < 0) {
sc_ret(regs) = (uint32_t)retpid;
return;
}
if (retpid == 0) {
/* WNOHANG, no child changed state yet */
if (user_infop) {
uint32_t zero[4] = {0, 0, 0, 0};
(void)copy_to_user(user_infop, zero, 16);
}
sc_ret(regs) = 0;
return;
}
if (user_infop) {
/* Fill minimal siginfo: si_signo=SIGCHLD(17), si_code=CLD_EXITED(1),
* si_pid, si_status */
uint32_t info[4];
info[0] = 17; /* si_signo = SIGCHLD */
info[1] = 1; /* si_code = CLD_EXITED */
info[2] = (uint32_t)retpid; /* si_pid */
info[3] = (uint32_t)status; /* si_status */
if (copy_to_user(user_infop, info, 16) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_LSEEK) {
int fd = (int)sc_arg0(regs);
int32_t off = (int32_t)sc_arg1(regs);
int whence = (int)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_lseek_impl(fd, off, whence);
return;
}
if (syscall_no == SYSCALL_FSTAT) {
int fd = (int)sc_arg0(regs);
struct stat* st = (struct stat*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_fstat_impl(fd, st);
return;
}
if (syscall_no == SYSCALL_STAT) {
const char* path = (const char*)sc_arg0(regs);
struct stat* user_st = (struct stat*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_stat_impl(path, user_st);
return;
}
if (syscall_no == SYSCALL_FSTATAT) {
int dirfd = (int)sc_arg0(regs);
const char* path = (const char*)sc_arg1(regs);
struct stat* user_st = (struct stat*)sc_arg2(regs);
uint32_t flags = (uint32_t)sc_arg3(regs);
sc_ret(regs) = (uint32_t)syscall_fstatat_impl(dirfd, path, user_st, flags);
return;
}
if (syscall_no == SYSCALL_DUP) {
int oldfd = (int)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_dup_impl(oldfd);
return;
}
if (syscall_no == SYSCALL_DUP2) {
int oldfd = (int)sc_arg0(regs);
int newfd = (int)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_dup2_impl(oldfd, newfd);
return;
}
if (syscall_no == SYSCALL_DUP3) {
int oldfd = (int)sc_arg0(regs);
int newfd = (int)sc_arg1(regs);
uint32_t flags = (uint32_t)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_dup3_impl(oldfd, newfd, flags);
return;
}
if (syscall_no == SYSCALL_PIPE) {
int* user_fds = (int*)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_pipe_impl(user_fds);
return;
}
if (syscall_no == SYSCALL_PIPE2) {
int* user_fds = (int*)sc_arg0(regs);
uint32_t flags = (uint32_t)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_pipe2_impl(user_fds, flags);
return;
}
if (syscall_no == SYSCALL_EXECVE) {
const char* path = (const char*)sc_arg0(regs);
const char* const* argv = (const char* const*)sc_arg1(regs);
const char* const* envp = (const char* const*)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_execve_impl(regs, path, argv, envp);
return;
}
if (syscall_no == SYSCALL_FORK) {
sc_ret(regs) = (uint32_t)syscall_fork_impl(regs);
return;
}
if (syscall_no == SYSCALL_POSIX_SPAWN) {
/* posix_spawn(pid_t* pid_out, path, argv, envp)
* Combines fork+execve atomically. Returns 0 on success and stores
* child pid in *pid_out. The child immediately execs path. */
uint32_t* user_pid = (uint32_t*)sc_arg0(regs);
const char* path = (const char*)sc_arg1(regs);
const char* const* argv = (const char* const*)sc_arg2(regs);
const char* const* envp = (const char* const*)sc_arg3(regs);
if (user_pid && user_range_ok(user_pid, 4) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
/* Fork: creates child with copy of parent's regs */
int child_pid = syscall_fork_impl(regs);
if (child_pid < 0) {
sc_ret(regs) = (uint32_t)child_pid; return;
}
if (child_pid == 0) {
/* We are in the child — exec immediately */
int rc = syscall_execve_impl(regs, path, argv, envp);
if (rc < 0) {
/* execve failed — close FDs and exit child */
for (int _fd = 0; _fd < PROCESS_MAX_FILES; _fd++) {
if (current_process && current_process->files[_fd])
(void)fd_close(_fd);
}
process_exit_notify(127);
hal_cpu_enable_interrupts();
schedule();
for (;;) hal_cpu_idle();
}
return; /* execve rewrote regs, return to new program */
}
/* Parent: store child PID */
if (user_pid) {
uint32_t cpid = (uint32_t)child_pid;
(void)copy_to_user(user_pid, &cpid, 4);
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_POLL) {
struct pollfd* fds = (struct pollfd*)sc_arg0(regs);
uint32_t nfds = sc_arg1(regs);
int32_t timeout = (int32_t)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_poll_impl(fds, nfds, timeout);
return;
}
if (syscall_no == SYSCALL_KILL) {
uint32_t pid = sc_arg0(regs);
int sig = (int)sc_arg1(regs);
sc_ret(regs) = (uint32_t)process_kill(pid, sig);
return;
}
if (syscall_no == SYSCALL_SIGQUEUE) {
uint32_t pid = sc_arg0(regs);
int sig = (int)sc_arg1(regs);
/* arg2 = si_value (union sigval — int or pointer) — stored but
* not yet delivered via siginfo because AdrOS uses a bitmask for
* pending signals, not a queue. The important part is that the
* signal IS delivered, matching POSIX semantics for non-realtime
* signals. */
(void)sc_arg2(regs);
sc_ret(regs) = (uint32_t)process_kill(pid, sig);
return;
}
if (syscall_no == SYSCALL_SELECT) {
uint32_t nfds = sc_arg0(regs);
uint64_t* readfds = (uint64_t*)sc_arg1(regs);
uint64_t* writefds = (uint64_t*)sc_arg2(regs);
uint64_t* exceptfds = (uint64_t*)sc_arg3(regs);
int32_t timeout = (int32_t)sc_arg4(regs);
sc_ret(regs) = (uint32_t)syscall_select_impl(nfds, readfds, writefds, exceptfds, timeout);
return;
}
if (syscall_no == SYSCALL_IOCTL) {
int fd = (int)sc_arg0(regs);
uint32_t cmd = (uint32_t)sc_arg1(regs);
void* arg = (void*)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_ioctl_impl(fd, cmd, arg);
return;
}
if (syscall_no == SYSCALL_SETSID) {
sc_ret(regs) = (uint32_t)syscall_setsid_impl();
return;
}
if (syscall_no == SYSCALL_SETPGID) {
int pid = (int)sc_arg0(regs);
int pgid = (int)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_setpgid_impl(pid, pgid);
return;
}
if (syscall_no == SYSCALL_GETPGRP) {
sc_ret(regs) = (uint32_t)syscall_getpgrp_impl();
return;
}
if (syscall_no == SYSCALL_SIGACTION) {
int sig = (int)sc_arg0(regs);
const struct sigaction* act = (const struct sigaction*)sc_arg1(regs);
struct sigaction* oldact = (struct sigaction*)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_sigaction_impl(sig, act, oldact);
return;
}
if (syscall_no == SYSCALL_SIGPROCMASK) {
uint32_t how = sc_arg0(regs);
uint32_t mask = sc_arg1(regs);
uint32_t* old_out = (uint32_t*)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_sigprocmask_impl(how, mask, old_out);
return;
}
if (syscall_no == SYSCALL_SIGRETURN) {
const void* user_frame = (const void*)(uintptr_t)sc_arg0(regs);
sc_ret(regs) = (uint32_t)arch_sigreturn(regs, user_frame);
return;
}
if (syscall_no == SYSCALL_FCNTL) {
int fd = (int)sc_arg0(regs);
int cmd = (int)sc_arg1(regs);
uint32_t arg = (uint32_t)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_fcntl_impl(fd, cmd, arg);
return;
}
if (syscall_no == SYSCALL_MKDIR) {
const char* path = (const char*)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_mkdir_impl(path);
return;
}
if (syscall_no == SYSCALL_UNLINK) {
const char* path = (const char*)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_unlink_impl(path);
return;
}
if (syscall_no == SYSCALL_UNLINKAT) {
int dirfd = (int)sc_arg0(regs);
const char* path = (const char*)sc_arg1(regs);
uint32_t flags = (uint32_t)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_unlinkat_impl(dirfd, path, flags);
return;
}
if (syscall_no == SYSCALL_GETDENTS) {
int fd = (int)sc_arg0(regs);
void* buf = (void*)sc_arg1(regs);
uint32_t len = (uint32_t)sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_getdents_impl(fd, buf, len);
return;
}
if (syscall_no == SYSCALL_RENAME) {
const char* oldpath = (const char*)sc_arg0(regs);
const char* newpath = (const char*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_rename_impl(oldpath, newpath);
return;
}
if (syscall_no == SYSCALL_RMDIR) {
const char* path = (const char*)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_rmdir_impl(path);
return;
}
if (syscall_no == SYSCALL_BRK) {
uintptr_t addr = (uintptr_t)sc_arg0(regs);
sc_ret(regs) = (uint32_t)syscall_brk_impl(addr);
return;
}
if (syscall_no == SYSCALL_NANOSLEEP) {
const struct timespec* req = (const struct timespec*)sc_arg0(regs);
struct timespec* rem = (struct timespec*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_nanosleep_impl(req, rem);
return;
}
if (syscall_no == SYSCALL_CLOCK_GETTIME) {
uint32_t clk_id = sc_arg0(regs);
struct timespec* tp = (struct timespec*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_clock_gettime_impl(clk_id, tp);
return;
}
if (syscall_no == SYSCALL_MMAP) {
uintptr_t addr = (uintptr_t)sc_arg0(regs);
uint32_t length = sc_arg1(regs);
uint32_t prot = sc_arg2(regs);
uint32_t mflags = sc_arg3(regs);
int fd = (int)sc_arg4(regs);
sc_ret(regs) = (uint32_t)syscall_mmap_impl(addr, length, prot, mflags, fd, 0);
return;
}
if (syscall_no == SYSCALL_MUNMAP) {
uintptr_t addr = (uintptr_t)sc_arg0(regs);
uint32_t length = sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_munmap_impl(addr, length);
return;
}
if (syscall_no == SYSCALL_SHMGET) {
uint32_t key = sc_arg0(regs);
uint32_t size = sc_arg1(regs);
int flags = (int)sc_arg2(regs);
sc_ret(regs) = (uint32_t)shm_get(key, size, flags);
return;
}
if (syscall_no == SYSCALL_SHMAT) {
int shmid = (int)sc_arg0(regs);
uintptr_t shmaddr = (uintptr_t)sc_arg1(regs);
sc_ret(regs) = (uint32_t)(uintptr_t)shm_at(shmid, shmaddr);
return;
}
if (syscall_no == SYSCALL_SHMDT) {
const void* shmaddr = (const void*)sc_arg0(regs);
sc_ret(regs) = (uint32_t)shm_dt(shmaddr);
return;
}
if (syscall_no == SYSCALL_SHMCTL) {
int shmid = (int)sc_arg0(regs);
int cmd = (int)sc_arg1(regs);
struct shmid_ds* buf = (struct shmid_ds*)sc_arg2(regs);
sc_ret(regs) = (uint32_t)shm_ctl(shmid, cmd, buf);
return;
}
if (syscall_no == SYSCALL_LINK) {
const char* oldpath = (const char*)sc_arg0(regs);
const char* newpath = (const char*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_link_impl(oldpath, newpath);
return;
}
if (syscall_no == SYSCALL_SYMLINK) {
const char* target = (const char*)sc_arg0(regs);
const char* linkpath = (const char*)sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_symlink_impl(target, linkpath);
return;
}
if (syscall_no == SYSCALL_READLINK) {
const char* path = (const char*)sc_arg0(regs);
char* buf = (char*)sc_arg1(regs);
uint32_t bufsz = sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_readlink_impl(path, buf, bufsz);
return;
}
if (syscall_no == SYSCALL_CHMOD) {
const char* path = (const char*)sc_arg0(regs);
uint32_t mode = sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_chmod_impl(path, mode);
return;
}
if (syscall_no == SYSCALL_CHOWN) {
const char* path = (const char*)sc_arg0(regs);
uint32_t uid = sc_arg1(regs);
uint32_t gid = sc_arg2(regs);
sc_ret(regs) = (uint32_t)syscall_chown_impl(path, uid, gid);
return;
}
if (syscall_no == SYSCALL_GETUID) {
sc_ret(regs) = current_process ? current_process->uid : 0;
return;
}
if (syscall_no == SYSCALL_GETGID) {
sc_ret(regs) = current_process ? current_process->gid : 0;
return;
}
if (syscall_no == SYSCALL_SET_THREAD_AREA) {
uintptr_t base = (uintptr_t)sc_arg0(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
current_process->tls_base = base;
hal_cpu_set_tls(base);
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_GETTID) {
sc_ret(regs) = current_process ? current_process->pid : 0;
return;
}
if (syscall_no == SYSCALL_CLONE) {
sc_ret(regs) = (uint32_t)syscall_clone_impl(regs);
return;
}
if (syscall_no == SYSCALL_SIGPENDING) {
uint32_t* user_set = (uint32_t*)sc_arg0(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t pending = current_process->sig_pending_mask & current_process->sig_blocked_mask;
if (copy_to_user(user_set, &pending, sizeof(pending)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
} else {
sc_ret(regs) = 0;
}
return;
}
if (syscall_no == SYSCALL_FSYNC || syscall_no == SYSCALL_FDATASYNC) {
int fd = (int)sc_arg0(regs);
if (!current_process || fd < 0 || fd >= PROCESS_MAX_FILES || !current_process->files[fd]) {
sc_ret(regs) = (uint32_t)-EBADF;
} else {
sc_ret(regs) = 0;
}
return;
}
if (syscall_no == SYSCALL_PREAD || syscall_no == SYSCALL_PWRITE ||
syscall_no == SYSCALL_ACCESS || syscall_no == SYSCALL_TRUNCATE ||
syscall_no == SYSCALL_FTRUNCATE || syscall_no == SYSCALL_READV ||
syscall_no == SYSCALL_WRITEV) {
posix_ext_syscall_dispatch(regs, syscall_no);
return;
}
if (syscall_no == SYSCALL_UMASK) {
if (!current_process) { sc_ret(regs) = 0; return; }
uint32_t old = current_process->umask;
current_process->umask = sc_arg0(regs) & 0777;
sc_ret(regs) = old;
return;
}
if (syscall_no == SYSCALL_SETUID) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t new_uid = sc_arg0(regs);
if (current_process->euid == 0) {
current_process->uid = new_uid;
current_process->euid = new_uid;
} else if (new_uid == current_process->uid) {
current_process->euid = new_uid;
} else {
sc_ret(regs) = (uint32_t)-EPERM;
return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_SETGID) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t new_gid = sc_arg0(regs);
if (current_process->euid == 0) {
current_process->gid = new_gid;
current_process->egid = new_gid;
} else if (new_gid == current_process->gid) {
current_process->egid = new_gid;
} else {
sc_ret(regs) = (uint32_t)-EPERM;
return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_GETEUID) {
sc_ret(regs) = current_process ? current_process->euid : 0;
return;
}
if (syscall_no == SYSCALL_GETEGID) {
sc_ret(regs) = current_process ? current_process->egid : 0;
return;
}
if (syscall_no == SYSCALL_SETEUID) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t new_euid = sc_arg0(regs);
if (current_process->euid == 0 || new_euid == current_process->uid) {
current_process->euid = new_euid;
sc_ret(regs) = 0;
} else {
sc_ret(regs) = (uint32_t)-EPERM;
}
return;
}
if (syscall_no == SYSCALL_SETEGID) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t new_egid = sc_arg0(regs);
if (current_process->euid == 0 || new_egid == current_process->gid) {
current_process->egid = new_egid;
sc_ret(regs) = 0;
} else {
sc_ret(regs) = (uint32_t)-EPERM;
}
return;
}
if (syscall_no == SYSCALL_FLOCK) {
int fd = (int)sc_arg0(regs);
int operation = (int)sc_arg1(regs);
if (!current_process || fd < 0 || fd >= PROCESS_MAX_FILES || !current_process->files[fd]) {
sc_ret(regs) = (uint32_t)-EBADF;
} else {
uint32_t ino = current_process->files[fd]->node->inode;
sc_ret(regs) = (uint32_t)flock_do(ino, current_process->pid, operation);
}
return;
}
if (syscall_no == SYSCALL_SIGALTSTACK ||
syscall_no == SYSCALL_TIMES || syscall_no == SYSCALL_FUTEX) {
posix_ext_syscall_dispatch(regs, syscall_no);
return;
}
if (syscall_no == SYSCALL_ALARM) {
if (!current_process) { sc_ret(regs) = 0; return; }
uint32_t seconds = sc_arg0(regs);
uint32_t now = get_tick_count();
uint32_t new_tick = (seconds == 0) ? 0 : now + seconds * TICKS_PER_SEC;
current_process->alarm_interval = 0; /* alarm() is always one-shot */
uint32_t old_tick = process_alarm_set(current_process, new_tick);
uint32_t old_remaining = 0;
if (old_tick > now) {
old_remaining = (old_tick - now) / TICKS_PER_SEC + 1;
}
sc_ret(regs) = old_remaining;
return;
}
if (syscall_no == SYSCALL_SETITIMER) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
int which = (int)sc_arg0(regs);
const void* user_new = (const void*)sc_arg1(regs);
void* user_old = (void*)sc_arg2(regs);
struct k_itimerval knew;
memset(&knew, 0, sizeof(knew));
if (user_new) {
if (copy_from_user(&knew, user_new, sizeof(knew)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
uint32_t now = get_tick_count();
if (which == ITIMER_REAL) {
/* Return old value */
if (user_old) {
struct k_itimerval kold;
memset(&kold, 0, sizeof(kold));
ticks_to_timeval(current_process->alarm_interval, &kold.it_interval);
if (current_process->alarm_tick > now)
ticks_to_timeval(current_process->alarm_tick - now, &kold.it_value);
if (copy_to_user(user_old, &kold, sizeof(kold)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
/* Set new value */
uint32_t val_ticks = timeval_to_ticks(&knew.it_value);
uint32_t int_ticks = timeval_to_ticks(&knew.it_interval);
current_process->alarm_interval = int_ticks;
process_alarm_set(current_process, val_ticks ? now + val_ticks : 0);
} else if (which == ITIMER_VIRTUAL) {
if (user_old) {
struct k_itimerval kold;
memset(&kold, 0, sizeof(kold));
ticks_to_timeval(current_process->itimer_virt_interval, &kold.it_interval);
ticks_to_timeval(current_process->itimer_virt_value, &kold.it_value);
if (copy_to_user(user_old, &kold, sizeof(kold)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
current_process->itimer_virt_value = timeval_to_ticks(&knew.it_value);
current_process->itimer_virt_interval = timeval_to_ticks(&knew.it_interval);
} else if (which == ITIMER_PROF) {
if (user_old) {
struct k_itimerval kold;
memset(&kold, 0, sizeof(kold));
ticks_to_timeval(current_process->itimer_prof_interval, &kold.it_interval);
ticks_to_timeval(current_process->itimer_prof_value, &kold.it_value);
if (copy_to_user(user_old, &kold, sizeof(kold)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
current_process->itimer_prof_value = timeval_to_ticks(&knew.it_value);
current_process->itimer_prof_interval = timeval_to_ticks(&knew.it_interval);
} else {
sc_ret(regs) = (uint32_t)-EINVAL; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_GETITIMER) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
int which = (int)sc_arg0(regs);
void* user_val = (void*)sc_arg1(regs);
if (!user_val) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
struct k_itimerval kval;
memset(&kval, 0, sizeof(kval));
uint32_t now = get_tick_count();
if (which == ITIMER_REAL) {
ticks_to_timeval(current_process->alarm_interval, &kval.it_interval);
if (current_process->alarm_tick > now)
ticks_to_timeval(current_process->alarm_tick - now, &kval.it_value);
} else if (which == ITIMER_VIRTUAL) {
ticks_to_timeval(current_process->itimer_virt_interval, &kval.it_interval);
ticks_to_timeval(current_process->itimer_virt_value, &kval.it_value);
} else if (which == ITIMER_PROF) {
ticks_to_timeval(current_process->itimer_prof_interval, &kval.it_interval);
ticks_to_timeval(current_process->itimer_prof_value, &kval.it_value);
} else {
sc_ret(regs) = (uint32_t)-EINVAL; return;
}
if (copy_to_user(user_val, &kval, sizeof(kval)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_SIGSUSPEND) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t new_mask = 0;
if (copy_from_user(&new_mask, (const void*)sc_arg0(regs), sizeof(new_mask)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
uint32_t old_mask = current_process->sig_blocked_mask;
current_process->sig_blocked_mask = new_mask;
extern void schedule(void);
while ((current_process->sig_pending_mask & ~current_process->sig_blocked_mask) == 0) {
schedule();
}
current_process->sig_blocked_mask = old_mask;
sc_ret(regs) = (uint32_t)-EINTR;
return;
}
if (syscall_no == SYSCALL_GETTIMEOFDAY) {
struct { uint32_t tv_sec; uint32_t tv_usec; } tv;
void* user_tv = (void*)sc_arg0(regs);
/* arg1 = timezone, ignored (obsolete per POSIX) */
if (!user_tv) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
if (user_range_ok(user_tv, 8) == 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
uint64_t ns = clock_gettime_ns();
uint32_t epoch_sec = rtc_unix_timestamp();
tv.tv_sec = epoch_sec;
tv.tv_usec = (uint32_t)((ns % 1000000000ULL) / 1000ULL);
if (copy_to_user(user_tv, &tv, 8) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_GETRLIMIT) {
uint32_t resource = sc_arg0(regs);
void* user_rlim = (void*)sc_arg1(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (resource >= _RLIMIT_COUNT) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (!user_rlim || user_range_ok(user_rlim, 8) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
if (copy_to_user(user_rlim, ¤t_process->rlimits[resource], 8) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_SETRLIMIT) {
uint32_t resource = sc_arg0(regs);
const void* user_rlim = (const void*)sc_arg1(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (resource >= _RLIMIT_COUNT) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (!user_rlim || user_range_ok(user_rlim, 8) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
struct { uint32_t cur; uint32_t max; } new_rl;
if (copy_from_user(&new_rl, user_rlim, 8) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
/* Non-root cannot raise max above current max */
if (new_rl.max > current_process->rlimits[resource].rlim_max &&
current_process->euid != 0) {
sc_ret(regs) = (uint32_t)-EPERM; return;
}
if (new_rl.cur > new_rl.max) new_rl.cur = new_rl.max;
current_process->rlimits[resource].rlim_cur = new_rl.cur;
current_process->rlimits[resource].rlim_max = new_rl.max;
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_MPROTECT) {
uintptr_t addr = (uintptr_t)sc_arg0(regs);
uint32_t len = sc_arg1(regs);
uint32_t prot = sc_arg2(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (addr == 0 || (addr & 0xFFF)) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (len == 0) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
/* Verify the range belongs to this process (heap, mmap, or stack) */
uint32_t aligned_len = (len + 0xFFFU) & ~(uint32_t)0xFFFU;
int owned = 0;
/* Check heap region */
if (addr >= current_process->heap_start && addr + aligned_len <= current_process->heap_break)
owned = 1;
/* Check mmap regions */
if (!owned) {
for (int i = 0; i < PROCESS_MAX_MMAPS; i++) {
uintptr_t mbase = current_process->mmaps[i].base;
uint32_t mlen = current_process->mmaps[i].length;
if (mlen == 0) continue;
if (addr >= mbase && addr + aligned_len <= mbase + mlen) {
owned = 1;
break;
}
}
}
/* Check stack region (user stack is below 0xC0000000, typically around 0xBFxxxxxx) */
if (!owned) {
uintptr_t kern_base = hal_mm_kernel_virt_base();
if (kern_base && addr < kern_base && addr >= 0x08000000U)
owned = 1; /* permissive: allow for text/data/bss/stack regions */
}
if (!owned) { sc_ret(regs) = (uint32_t)-ENOMEM; return; }
/* Convert POSIX PROT_* to VMM flags */
uint32_t vmm_flags = VMM_FLAG_PRESENT | VMM_FLAG_USER;
if (prot & PROT_WRITE) vmm_flags |= VMM_FLAG_RW;
if (!(prot & PROT_EXEC)) vmm_flags |= VMM_FLAG_NX;
vmm_protect_range((uint64_t)addr, (uint64_t)aligned_len, vmm_flags);
sc_ret(regs) = 0;
return;
}
/* ---- Socket syscalls ---- */
socket_syscall_dispatch(regs, syscall_no);
/* If socket dispatch handled it, the return register is set and we return.
If not, it sets ENOSYS. Either way, return. */
return;
}
/* Separate function to keep pread/pwrite/access locals off syscall_handler's stack */
__attribute__((noinline))
static void posix_ext_syscall_dispatch(struct registers* regs, uint32_t syscall_no) {
if (syscall_no == SYSCALL_PREAD) {
int fd = (int)sc_arg0(regs);
void* buf = (void*)sc_arg1(regs);
uint32_t count = sc_arg2(regs);
uint32_t offset = sc_arg3(regs);
struct file* f = fd_get(fd);
if (!f || !f->node) { sc_ret(regs) = (uint32_t)-EBADF; return; }
if (!(f->node->f_ops && f->node->f_ops->read)) { sc_ret(regs) = (uint32_t)-ESPIPE; return; }
if (count > 1024 * 1024) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint8_t kbuf[256];
uint32_t total = 0;
while (total < count) {
uint32_t chunk = count - total;
if (chunk > sizeof(kbuf)) chunk = (uint32_t)sizeof(kbuf);
uint32_t rd = vfs_read(f->node, offset + total, chunk, kbuf);
if (rd == 0) break;
if (copy_to_user((uint8_t*)buf + total, kbuf, rd) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
total += rd;
if (rd < chunk) break;
}
sc_ret(regs) = total;
return;
}
if (syscall_no == SYSCALL_PWRITE) {
int fd = (int)sc_arg0(regs);
const void* buf = (const void*)sc_arg1(regs);
uint32_t count = sc_arg2(regs);
uint32_t offset = sc_arg3(regs);
struct file* f = fd_get(fd);
if (!f || !f->node) { sc_ret(regs) = (uint32_t)-EBADF; return; }
if (!(f->node->f_ops && f->node->f_ops->write)) { sc_ret(regs) = (uint32_t)-ESPIPE; return; }
if (count > 1024 * 1024) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint8_t kbuf[256];
uint32_t total = 0;
while (total < count) {
uint32_t chunk = count - total;
if (chunk > sizeof(kbuf)) chunk = (uint32_t)sizeof(kbuf);
if (copy_from_user(kbuf, (const uint8_t*)buf + total, chunk) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
uint32_t wr = vfs_write(f->node, offset + total, chunk, kbuf);
if (wr == 0) break;
total += wr;
if (wr < chunk) break;
}
sc_ret(regs) = total;
return;
}
if (syscall_no == SYSCALL_ACCESS) {
const char* user_path = (const char*)sc_arg0(regs);
if (!user_path) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) { sc_ret(regs) = (uint32_t)prc; return; }
fs_node_t* node = vfs_lookup(path);
if (!node) { sc_ret(regs) = (uint32_t)-ENOENT; return; }
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_FTRUNCATE) {
int fd = (int)sc_arg0(regs);
uint32_t length = sc_arg1(regs);
struct file* f = fd_get(fd);
if (!f || !f->node) { sc_ret(regs) = (uint32_t)-EBADF; return; }
if (!(f->node->flags & FS_FILE)) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
f->node->length = length;
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_TRUNCATE) {
const char* user_path = (const char*)sc_arg0(regs);
uint32_t length = sc_arg1(regs);
if (!user_path) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
char path[128];
int prc = path_resolve_user(user_path, path, sizeof(path));
if (prc < 0) { sc_ret(regs) = (uint32_t)prc; return; }
fs_node_t* node = vfs_lookup(path);
if (!node) { sc_ret(regs) = (uint32_t)-ENOENT; return; }
if (!(node->flags & FS_FILE)) { sc_ret(regs) = (uint32_t)-EISDIR; return; }
node->length = length;
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_READV) {
int fd = (int)sc_arg0(regs);
struct { void* iov_base; uint32_t iov_len; } iov;
const void* user_iov = (const void*)sc_arg1(regs);
int iovcnt = (int)sc_arg2(regs);
if (iovcnt <= 0 || iovcnt > 16) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t total = 0;
for (int i = 0; i < iovcnt; i++) {
if (copy_from_user(&iov, (const char*)user_iov + i * 8, 8) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
if (iov.iov_len == 0) continue;
int r = syscall_read_impl(fd, iov.iov_base, iov.iov_len);
if (r < 0) { if (total == 0) { sc_ret(regs) = (uint32_t)r; return; } break; }
total += (uint32_t)r;
if ((uint32_t)r < iov.iov_len) break;
}
sc_ret(regs) = total;
return;
}
if (syscall_no == SYSCALL_WRITEV) {
int fd = (int)sc_arg0(regs);
struct { const void* iov_base; uint32_t iov_len; } iov;
const void* user_iov = (const void*)sc_arg1(regs);
int iovcnt = (int)sc_arg2(regs);
if (iovcnt <= 0 || iovcnt > 16) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t total = 0;
for (int i = 0; i < iovcnt; i++) {
if (copy_from_user(&iov, (const char*)user_iov + i * 8, 8) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
if (iov.iov_len == 0) continue;
int r = syscall_write_impl(fd, iov.iov_base, iov.iov_len);
if (r < 0) { if (total == 0) { sc_ret(regs) = (uint32_t)r; return; } break; }
total += (uint32_t)r;
if ((uint32_t)r < iov.iov_len) break;
}
sc_ret(regs) = total;
return;
}
if (syscall_no == SYSCALL_SIGALTSTACK) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
#ifndef SS_DISABLE
#define SS_DISABLE 2
#endif
uint32_t* user_old = (uint32_t*)sc_arg1(regs);
uint32_t* user_new = (uint32_t*)sc_arg0(regs);
if (user_old) {
uint32_t old_ss[3];
old_ss[0] = (uint32_t)current_process->ss_sp;
old_ss[1] = current_process->ss_flags;
old_ss[2] = current_process->ss_size;
if (copy_to_user(user_old, old_ss, 12) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
if (user_new) {
uint32_t new_ss[3];
if (copy_from_user(new_ss, user_new, 12) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
if (new_ss[1] & SS_DISABLE) {
current_process->ss_sp = 0;
current_process->ss_size = 0;
current_process->ss_flags = SS_DISABLE;
} else {
current_process->ss_sp = (uintptr_t)new_ss[0];
current_process->ss_flags = new_ss[1];
current_process->ss_size = new_ss[2];
}
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_FUTEX) {
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_MAX_WAITERS 32
static struct { uintptr_t addr; struct process* proc; } futex_waiters[FUTEX_MAX_WAITERS];
uint32_t* uaddr = (uint32_t*)sc_arg0(regs);
int op = (int)sc_arg1(regs);
uint32_t val = sc_arg2(regs);
if (!uaddr) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
if (op == FUTEX_WAIT) {
uint32_t cur = 0;
if (copy_from_user(&cur, uaddr, sizeof(cur)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
if (cur != val) { sc_ret(regs) = (uint32_t)-EAGAIN; return; }
/* Add to waiter list and sleep */
int slot = -1;
for (int i = 0; i < FUTEX_MAX_WAITERS; i++) {
if (!futex_waiters[i].proc) { slot = i; break; }
}
if (slot < 0) { sc_ret(regs) = (uint32_t)-ENOMEM; return; }
futex_waiters[slot].addr = (uintptr_t)uaddr;
futex_waiters[slot].proc = current_process;
extern void schedule(void);
current_process->state = PROCESS_SLEEPING;
current_process->wake_at_tick = get_tick_count() + 5000; /* 100s timeout */
schedule();
futex_waiters[slot].proc = 0;
futex_waiters[slot].addr = 0;
sc_ret(regs) = 0;
return;
}
if (op == FUTEX_WAKE) {
int woken = 0;
int max_wake = (int)val;
if (max_wake <= 0) max_wake = 1;
for (int i = 0; i < FUTEX_MAX_WAITERS && woken < max_wake; i++) {
if (futex_waiters[i].proc && futex_waiters[i].addr == (uintptr_t)uaddr) {
futex_waiters[i].proc->state = PROCESS_READY;
futex_waiters[i].proc = 0;
futex_waiters[i].addr = 0;
woken++;
}
}
sc_ret(regs) = (uint32_t)woken;
return;
}
sc_ret(regs) = (uint32_t)-ENOSYS;
return;
}
if (syscall_no == SYSCALL_TIMES) {
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
struct { uint32_t tms_utime; uint32_t tms_stime; uint32_t tms_cutime; uint32_t tms_cstime; } tms;
tms.tms_utime = current_process->utime;
tms.tms_stime = current_process->stime;
tms.tms_cutime = 0;
tms.tms_cstime = 0;
void* user_buf = (void*)sc_arg0(regs);
if (user_buf) {
if (copy_to_user(user_buf, &tms, sizeof(tms)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
sc_ret(regs) = get_tick_count();
return;
}
sc_ret(regs) = (uint32_t)-ENOSYS;
}
/* --- Socket VFS node --- */
static uint32_t sock_node_read(fs_node_t* node, uint32_t offset, uint32_t size, uint8_t* buffer) {
(void)offset;
if (!node || !buffer) return 0;
int sid = (int)node->inode;
int rc = ksocket_recv(sid, buffer, size, 0);
return (rc > 0) ? (uint32_t)rc : 0;
}
static uint32_t sock_node_write(fs_node_t* node, uint32_t offset, uint32_t size, const uint8_t* buffer) {
(void)offset;
if (!node || !buffer) return 0;
int sid = (int)node->inode;
int rc = ksocket_send(sid, buffer, size, 0);
return (rc > 0) ? (uint32_t)rc : 0;
}
static void sock_node_close(fs_node_t* node) {
if (!node) return;
ksocket_close((int)node->inode);
kfree(node);
}
static int sock_node_poll(fs_node_t* node, int events) {
if (!node) return VFS_POLL_ERR;
return ksocket_poll((int)node->inode, events);
}
static const struct file_operations sock_fops = {
.read = sock_node_read,
.write = sock_node_write,
.close = sock_node_close,
.poll = sock_node_poll,
};
static fs_node_t* sock_node_create(int sid) {
fs_node_t* n = (fs_node_t*)kmalloc(sizeof(fs_node_t));
if (!n) return NULL;
memset(n, 0, sizeof(*n));
strcpy(n->name, "socket");
n->flags = FS_SOCKET;
n->inode = (uint32_t)sid;
n->f_ops = &sock_fops;
return n;
}
static inline int sock_fd_get_sid(int fd) {
if (fd < 0 || fd >= PROCESS_MAX_FILES) return -EBADF;
struct file* f = current_process->files[fd];
if (!f || !f->node || f->node->flags != FS_SOCKET) return -EBADF;
return (int)f->node->inode;
}
__attribute__((noinline))
static int syscall_sendmsg_impl(int sockfd, void* user_msg, int flags) {
int sid = sock_fd_get_sid(sockfd);
if (sid < 0) return -EBADF;
struct { void* name; uint32_t namelen; void* iov; uint32_t iovlen;
void* control; uint32_t controllen; int mflags; } kmsg;
if (copy_from_user(&kmsg, user_msg, sizeof(kmsg)) < 0) return -EFAULT;
struct sockaddr_in dest;
int has_dest = 0;
if (kmsg.name && kmsg.namelen >= sizeof(dest)) {
if (copy_from_user(&dest, kmsg.name, sizeof(dest)) < 0) return -EFAULT;
has_dest = 1;
}
int total = 0;
for (uint32_t i = 0; i < kmsg.iovlen && i < 16; i++) {
struct { void* base; uint32_t len; } kiov;
uint8_t* iov_arr = (uint8_t*)kmsg.iov;
if (copy_from_user(&kiov, &iov_arr[i * sizeof(kiov)], sizeof(kiov)) < 0)
return -EFAULT;
if (kiov.len == 0) continue;
if (!user_range_ok(kiov.base, kiov.len)) return -EFAULT;
int ret;
if (has_dest)
ret = ksocket_sendto(sid, kiov.base, kiov.len, flags, &dest);
else
ret = ksocket_send(sid, kiov.base, kiov.len, flags);
if (ret < 0) return (total > 0) ? total : ret;
total += ret;
}
return total;
}
__attribute__((noinline))
static int syscall_recvmsg_impl(int sockfd, void* user_msg, int flags) {
int sid = sock_fd_get_sid(sockfd);
if (sid < 0) return -EBADF;
struct { void* name; uint32_t namelen; void* iov; uint32_t iovlen;
void* control; uint32_t controllen; int mflags; } kmsg;
if (copy_from_user(&kmsg, user_msg, sizeof(kmsg)) < 0) return -EFAULT;
int total = 0;
struct sockaddr_in src;
memset(&src, 0, sizeof(src));
for (uint32_t i = 0; i < kmsg.iovlen && i < 16; i++) {
struct { void* base; uint32_t len; } kiov;
uint8_t* iov_arr = (uint8_t*)kmsg.iov;
if (copy_from_user(&kiov, &iov_arr[i * sizeof(kiov)], sizeof(kiov)) < 0)
return -EFAULT;
if (kiov.len == 0) continue;
if (!user_range_ok(kiov.base, kiov.len)) return -EFAULT;
int ret = ksocket_recvfrom(sid, kiov.base, kiov.len, flags, &src);
if (ret < 0) return (total > 0) ? total : ret;
total += ret;
if ((uint32_t)ret < kiov.len) break;
}
if (kmsg.name && kmsg.namelen >= sizeof(src))
(void)copy_to_user(kmsg.name, &src, sizeof(src));
return total;
}
/* Separate function to keep socket locals off syscall_handler's stack frame */
__attribute__((noinline))
static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) {
if (syscall_no == SYSCALL_SOCKET) {
int domain = (int)sc_arg0(regs);
int type = (int)sc_arg1(regs);
int protocol = (int)sc_arg2(regs);
int sid = ksocket_create(domain, type, protocol);
if (sid < 0) { sc_ret(regs) = (uint32_t)sid; return; }
fs_node_t* sn = sock_node_create(sid);
if (!sn) { ksocket_close(sid); sc_ret(regs) = (uint32_t)-ENOMEM; return; }
struct file* f = (struct file*)kmalloc(sizeof(struct file));
if (!f) { sock_node_close(sn); sc_ret(regs) = (uint32_t)-ENOMEM; return; }
f->node = sn;
f->offset = 0;
f->flags = 0;
f->refcount = 1;
int fd = fd_alloc(f);
if (fd < 0) { sock_node_close(sn); kfree(f); sc_ret(regs) = (uint32_t)-EMFILE; return; }
sc_ret(regs) = (uint32_t)fd;
return;
}
if (syscall_no == SYSCALL_BIND) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
struct sockaddr_in sa;
if (copy_from_user(&sa, (const void*)sc_arg1(regs), sizeof(sa)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = (uint32_t)ksocket_bind(sid, &sa);
return;
}
if (syscall_no == SYSCALL_LISTEN) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
sc_ret(regs) = (uint32_t)ksocket_listen(sid, (int)sc_arg1(regs));
return;
}
if (syscall_no == SYSCALL_ACCEPT) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
struct sockaddr_in sa;
memset(&sa, 0, sizeof(sa));
int new_sid = ksocket_accept(sid, &sa);
if (new_sid < 0) { sc_ret(regs) = (uint32_t)new_sid; return; }
fs_node_t* sn = sock_node_create(new_sid);
if (!sn) { ksocket_close(new_sid); sc_ret(regs) = (uint32_t)-ENOMEM; return; }
struct file* f = (struct file*)kmalloc(sizeof(struct file));
if (!f) { sock_node_close(sn); sc_ret(regs) = (uint32_t)-ENOMEM; return; }
f->node = sn;
f->offset = 0;
f->flags = 0;
f->refcount = 1;
int new_fd = fd_alloc(f);
if (new_fd < 0) { sock_node_close(sn); kfree(f); sc_ret(regs) = (uint32_t)-EMFILE; return; }
if (sc_arg1(regs)) {
(void)copy_to_user((void*)sc_arg1(regs), &sa, sizeof(sa));
}
sc_ret(regs) = (uint32_t)new_fd;
return;
}
if (syscall_no == SYSCALL_CONNECT) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
struct sockaddr_in sa;
if (copy_from_user(&sa, (const void*)sc_arg1(regs), sizeof(sa)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = (uint32_t)ksocket_connect(sid, &sa);
return;
}
if (syscall_no == SYSCALL_SEND) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
size_t len = (size_t)sc_arg2(regs);
if (!user_range_ok((const void*)sc_arg1(regs), len)) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = (uint32_t)ksocket_send(sid, (const void*)sc_arg1(regs), len, (int)sc_arg3(regs));
return;
}
if (syscall_no == SYSCALL_RECV) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
size_t len = (size_t)sc_arg2(regs);
if (!user_range_ok((void*)sc_arg1(regs), len)) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = (uint32_t)ksocket_recv(sid, (void*)sc_arg1(regs), len, (int)sc_arg3(regs));
return;
}
if (syscall_no == SYSCALL_SENDTO) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
size_t len = (size_t)sc_arg2(regs);
if (!user_range_ok((const void*)sc_arg1(regs), len)) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
struct sockaddr_in dest;
if (copy_from_user(&dest, (const void*)sc_arg4(regs), sizeof(dest)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = (uint32_t)ksocket_sendto(sid, (const void*)sc_arg1(regs), len,
(int)sc_arg3(regs), &dest);
return;
}
if (syscall_no == SYSCALL_RECVFROM) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
size_t len = (size_t)sc_arg2(regs);
if (!user_range_ok((void*)sc_arg1(regs), len)) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
struct sockaddr_in src;
memset(&src, 0, sizeof(src));
int ret = ksocket_recvfrom(sid, (void*)sc_arg1(regs), len, (int)sc_arg3(regs), &src);
if (ret > 0 && sc_arg4(regs)) {
(void)copy_to_user((void*)sc_arg4(regs), &src, sizeof(src));
}
sc_ret(regs) = (uint32_t)ret;
return;
}
if (syscall_no == SYSCALL_SENDMSG) {
sc_ret(regs) = (uint32_t)syscall_sendmsg_impl(
(int)sc_arg0(regs), (void*)sc_arg1(regs), (int)sc_arg2(regs));
return;
}
if (syscall_no == SYSCALL_RECVMSG) {
sc_ret(regs) = (uint32_t)syscall_recvmsg_impl(
(int)sc_arg0(regs), (void*)sc_arg1(regs), (int)sc_arg2(regs));
return;
}
if (syscall_no == SYSCALL_SETITIMER) {
/* setitimer(which, user_new_value, user_old_value)
* struct itimerval { uint32_t it_interval; uint32_t it_value; } (ticks) */
uint32_t which = sc_arg0(regs);
void* user_new = (void*)sc_arg1(regs);
void* user_old = (void*)sc_arg2(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
uint32_t pair[2]; /* [0]=it_interval, [1]=it_value */
if (user_old) {
if (user_range_ok(user_old, 8) == 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
uint32_t old[2] = {0, 0};
if (which == 0) { /* ITIMER_REAL */
old[0] = current_process->alarm_interval;
extern uint32_t get_tick_count(void);
uint32_t now = get_tick_count();
old[1] = (current_process->alarm_tick > now) ? current_process->alarm_tick - now : 0;
} else if (which == 1) { /* ITIMER_VIRTUAL */
old[0] = current_process->itimer_virt_interval;
old[1] = current_process->itimer_virt_value;
} else if (which == 2) { /* ITIMER_PROF */
old[0] = current_process->itimer_prof_interval;
old[1] = current_process->itimer_prof_value;
}
if (copy_to_user(user_old, old, 8) < 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
}
if (user_new) {
if (user_range_ok(user_new, 8) == 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
if (copy_from_user(pair, user_new, 8) < 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
} else {
pair[0] = 0; pair[1] = 0;
}
if (which == 0) { /* ITIMER_REAL — uses alarm queue */
current_process->alarm_interval = pair[0];
if (pair[1] > 0) {
extern uint32_t get_tick_count(void);
process_alarm_set(current_process, get_tick_count() + pair[1]);
} else {
process_alarm_set(current_process, 0);
}
} else if (which == 1) { /* ITIMER_VIRTUAL */
current_process->itimer_virt_interval = pair[0];
current_process->itimer_virt_value = pair[1];
} else if (which == 2) { /* ITIMER_PROF */
current_process->itimer_prof_interval = pair[0];
current_process->itimer_prof_value = pair[1];
} else {
sc_ret(regs) = (uint32_t)-EINVAL; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_GETITIMER) {
uint32_t which = sc_arg0(regs);
void* user_val = (void*)sc_arg1(regs);
if (!current_process) { sc_ret(regs) = (uint32_t)-EINVAL; return; }
if (!user_val || user_range_ok(user_val, 8) == 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
uint32_t out[2] = {0, 0};
if (which == 0) {
out[0] = current_process->alarm_interval;
extern uint32_t get_tick_count(void);
uint32_t now = get_tick_count();
out[1] = (current_process->alarm_tick > now) ? current_process->alarm_tick - now : 0;
} else if (which == 1) {
out[0] = current_process->itimer_virt_interval;
out[1] = current_process->itimer_virt_value;
} else if (which == 2) {
out[0] = current_process->itimer_prof_interval;
out[1] = current_process->itimer_prof_value;
} else {
sc_ret(regs) = (uint32_t)-EINVAL; return;
}
if (copy_to_user(user_val, out, 8) < 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_MQ_OPEN) {
const char* name = (const char*)sc_arg0(regs);
uint32_t oflag = sc_arg1(regs);
sc_ret(regs) = (uint32_t)syscall_mq_open_impl(name, oflag);
return;
}
if (syscall_no == SYSCALL_MQ_CLOSE) {
sc_ret(regs) = (uint32_t)syscall_mq_close_impl((int)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_MQ_SEND) {
sc_ret(regs) = (uint32_t)syscall_mq_send_impl(
(int)sc_arg0(regs), (const void*)sc_arg1(regs),
sc_arg2(regs), sc_arg3(regs));
return;
}
if (syscall_no == SYSCALL_MQ_RECEIVE) {
sc_ret(regs) = (uint32_t)syscall_mq_receive_impl(
(int)sc_arg0(regs), (void*)sc_arg1(regs),
sc_arg2(regs), (uint32_t*)sc_arg3(regs));
return;
}
if (syscall_no == SYSCALL_MQ_UNLINK) {
sc_ret(regs) = (uint32_t)syscall_mq_unlink_impl((const char*)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_SEM_OPEN) {
sc_ret(regs) = (uint32_t)syscall_sem_open_impl(
(const char*)sc_arg0(regs), sc_arg1(regs), sc_arg2(regs));
return;
}
if (syscall_no == SYSCALL_SEM_CLOSE) {
sc_ret(regs) = (uint32_t)syscall_sem_close_impl((int)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_SEM_WAIT) {
sc_ret(regs) = (uint32_t)syscall_sem_wait_impl((int)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_SEM_POST) {
sc_ret(regs) = (uint32_t)syscall_sem_post_impl((int)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_SEM_UNLINK) {
sc_ret(regs) = (uint32_t)syscall_sem_unlink_impl((const char*)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_SEM_GETVALUE) {
sc_ret(regs) = (uint32_t)syscall_sem_getvalue_impl(
(int)sc_arg0(regs), (int*)sc_arg1(regs));
return;
}
if (syscall_no == SYSCALL_GETADDRINFO) {
/* getaddrinfo(user_hostname, user_out_ip)
* Resolves hostname to IPv4 address (network byte order).
* Checks built-in hosts table first, then falls back to DNS. */
const char* user_host = (const char*)sc_arg0(regs);
uint32_t* user_out = (uint32_t*)sc_arg1(regs);
if (!user_host || !user_out) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
if (user_range_ok(user_out, 4) == 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
char host[128];
if (copy_from_user(host, user_host, 127) < 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
host[127] = 0;
/* Built-in /etc/hosts equivalent */
uint32_t ip = 0;
if (strcmp(host, "localhost") == 0 || strcmp(host, "localhost.localdomain") == 0) {
ip = 0x0100007FU; /* 127.0.0.1 in network byte order (little-endian) */
}
if (ip == 0) {
/* Try kernel DNS resolver */
extern int dns_resolve(const char* hostname, uint32_t* out_ip);
int rc = dns_resolve(host, &ip);
if (rc < 0) { sc_ret(regs) = (uint32_t)-ENOENT; return; }
}
if (copy_to_user(user_out, &ip, 4) < 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_DLOPEN) {
sc_ret(regs) = (uint32_t)syscall_dlopen_impl((const char*)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_DLSYM) {
sc_ret(regs) = (uint32_t)syscall_dlsym_impl(
(int)sc_arg0(regs), (const char*)sc_arg1(regs), (uint32_t*)sc_arg2(regs));
return;
}
if (syscall_no == SYSCALL_DLCLOSE) {
sc_ret(regs) = (uint32_t)syscall_dlclose_impl((int)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_EPOLL_CREATE) {
sc_ret(regs) = (uint32_t)syscall_epoll_create_impl();
return;
}
if (syscall_no == SYSCALL_EPOLL_CTL) {
sc_ret(regs) = (uint32_t)syscall_epoll_ctl_impl(
(int)sc_arg0(regs), (int)sc_arg1(regs),
(int)sc_arg2(regs), (struct epoll_event*)sc_arg3(regs));
return;
}
if (syscall_no == SYSCALL_EPOLL_WAIT) {
sc_ret(regs) = (uint32_t)syscall_epoll_wait_impl(
(int)sc_arg0(regs), (struct epoll_event*)sc_arg1(regs),
(int)sc_arg2(regs), (int)sc_arg3(regs));
return;
}
if (syscall_no == SYSCALL_INOTIFY_INIT) {
sc_ret(regs) = (uint32_t)syscall_inotify_init_impl();
return;
}
if (syscall_no == SYSCALL_INOTIFY_ADD_WATCH) {
sc_ret(regs) = (uint32_t)syscall_inotify_add_watch_impl(
(int)sc_arg0(regs), (const char*)sc_arg1(regs), (uint32_t)sc_arg2(regs));
return;
}
if (syscall_no == SYSCALL_INOTIFY_RM_WATCH) {
sc_ret(regs) = (uint32_t)syscall_inotify_rm_watch_impl(
(int)sc_arg0(regs), (int)sc_arg1(regs));
return;
}
if (syscall_no == SYSCALL_AIO_READ || syscall_no == SYSCALL_AIO_WRITE) {
sc_ret(regs) = (uint32_t)syscall_aio_rw_impl(
(void*)sc_arg0(regs), syscall_no == SYSCALL_AIO_WRITE);
return;
}
if (syscall_no == SYSCALL_AIO_ERROR) {
sc_ret(regs) = (uint32_t)syscall_aio_error_impl((void*)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_AIO_RETURN) {
sc_ret(regs) = (uint32_t)syscall_aio_return_impl((void*)sc_arg0(regs));
return;
}
if (syscall_no == SYSCALL_AIO_SUSPEND) {
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_PIVOT_ROOT) {
if (!current_process || current_process->euid != 0) {
sc_ret(regs) = (uint32_t)-EPERM;
return;
}
const char* user_new = (const char*)sc_arg0(regs);
const char* user_put = (const char*)sc_arg1(regs);
char knew[128], kput[128];
if (path_resolve_user(user_new, knew, sizeof(knew)) < 0 ||
path_resolve_user(user_put, kput, sizeof(kput)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
fs_node_t* new_root = vfs_lookup(knew);
if (!new_root || !(new_root->flags & FS_DIRECTORY)) {
sc_ret(regs) = (uint32_t)-EINVAL;
return;
}
fs_node_t* old_root = fs_root;
fs_root = new_root;
(void)vfs_mount("/", new_root);
if (old_root) {
(void)vfs_mount(kput, old_root);
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_MOUNT) {
if (!current_process || current_process->euid != 0) {
sc_ret(regs) = (uint32_t)-EPERM;
return;
}
const char* user_dev = (const char*)sc_arg0(regs);
const char* user_mp = (const char*)sc_arg1(regs);
const char* user_type = (const char*)sc_arg2(regs);
char kdev[64], kmp[128], ktype[32];
if (copy_from_user(kdev, user_dev, sizeof(kdev)) < 0 ||
path_resolve_user(user_mp, kmp, sizeof(kmp)) < 0 ||
copy_from_user(ktype, user_type, sizeof(ktype)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
kdev[sizeof(kdev)-1] = '\0';
ktype[sizeof(ktype)-1] = '\0';
if (strcmp(ktype, "tmpfs") == 0) {
fs_node_t* tmp = tmpfs_create_root();
if (!tmp) { sc_ret(regs) = (uint32_t)-ENOMEM; return; }
sc_ret(regs) = (uint32_t)vfs_mount(kmp, tmp);
return;
}
/* Disk-based: parse /dev/hdX -> drive number */
const char* devname = kdev;
if (strncmp(devname, "/dev/", 5) == 0) devname += 5;
extern int ata_name_to_drive(const char* name);
int drive = ata_name_to_drive(devname);
if (drive < 0) { sc_ret(regs) = (uint32_t)-ENODEV; return; }
extern int init_mount_fs(const char* fstype, int drive, uint32_t lba, const char* mountpoint);
int rc = init_mount_fs(ktype, drive, 0, kmp);
sc_ret(regs) = (uint32_t)(rc < 0 ? -EIO : 0);
return;
}
if (syscall_no == SYSCALL_SETSOCKOPT) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
int level = (int)sc_arg1(regs);
int optname = (int)sc_arg2(regs);
uint32_t optlen = sc_arg4(regs);
int kval = 0;
if (optlen >= 4 && sc_arg3(regs)) {
if (copy_from_user(&kval, (const void*)sc_arg3(regs), 4) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
}
sc_ret(regs) = (uint32_t)ksocket_setsockopt(sid, level, optname, &kval, optlen);
return;
}
if (syscall_no == SYSCALL_GETSOCKOPT) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
int level = (int)sc_arg1(regs);
int optname = (int)sc_arg2(regs);
int kval = 0;
uint32_t klen = 4;
int r = ksocket_getsockopt(sid, level, optname, &kval, &klen);
if (r == 0 && sc_arg3(regs) && sc_arg4(regs)) {
if (copy_to_user((void*)sc_arg3(regs), &kval, 4) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
(void)copy_to_user((void*)sc_arg4(regs), &klen, 4);
}
sc_ret(regs) = (uint32_t)r;
return;
}
if (syscall_no == SYSCALL_SHUTDOWN) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
sc_ret(regs) = (uint32_t)ksocket_shutdown(sid, (int)sc_arg1(regs));
return;
}
if (syscall_no == SYSCALL_GETPEERNAME) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
struct sockaddr_in sa;
memset(&sa, 0, sizeof(sa));
int r = ksocket_getpeername(sid, &sa);
if (r == 0 && sc_arg1(regs)) {
(void)copy_to_user((void*)sc_arg1(regs), &sa, sizeof(sa));
}
sc_ret(regs) = (uint32_t)r;
return;
}
if (syscall_no == SYSCALL_GETSOCKNAME) {
int sid = sock_fd_get_sid((int)sc_arg0(regs));
if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; }
struct sockaddr_in sa;
memset(&sa, 0, sizeof(sa));
int r = ksocket_getsockname(sid, &sa);
if (r == 0 && sc_arg1(regs)) {
(void)copy_to_user((void*)sc_arg1(regs), &sa, sizeof(sa));
}
sc_ret(regs) = (uint32_t)r;
return;
}
if (syscall_no == SYSCALL_GETRUSAGE) {
int who = (int)sc_arg0(regs);
void* user_buf = (void*)sc_arg1(regs);
if (!user_buf || user_range_ok(user_buf, 64) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
/* struct rusage: we fill ru_utime and ru_stime (2 x struct timeval = 16 bytes)
* then zero the rest (total 64 bytes to be safe) */
struct {
uint32_t ru_utime_sec; uint32_t ru_utime_usec;
uint32_t ru_stime_sec; uint32_t ru_stime_usec;
uint32_t padding[12]; /* remaining fields zero */
} ru;
memset(&ru, 0, sizeof(ru));
(void)who; /* RUSAGE_SELF=0, RUSAGE_CHILDREN=-1 — we only report self */
uint32_t hz = 100; /* tick rate */
ru.ru_utime_sec = current_process->utime / hz;
ru.ru_utime_usec = (current_process->utime % hz) * (1000000 / hz);
ru.ru_stime_sec = current_process->stime / hz;
ru.ru_stime_usec = (current_process->stime % hz) * (1000000 / hz);
if (copy_to_user(user_buf, &ru, sizeof(ru)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_UNAME) {
/* struct utsname: 5 fields of 65 bytes each = 325 bytes */
void* user_buf = (void*)sc_arg0(regs);
if (!user_buf) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
if (user_range_ok(user_buf, 325) == 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; }
struct {
char sysname[65];
char nodename[65];
char release[65];
char version[65];
char machine[65];
} uts;
memset(&uts, 0, sizeof(uts));
strcpy(uts.sysname, "AdrOS");
strcpy(uts.nodename, "adros");
strcpy(uts.release, "1.0.0");
strcpy(uts.version, "AdrOS 1.0.0 POSIX");
strcpy(uts.machine, "i686");
if (copy_to_user(user_buf, &uts, sizeof(uts)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_UMOUNT2) {
if (!current_process || current_process->euid != 0) {
sc_ret(regs) = (uint32_t)-EPERM;
return;
}
const char* user_target = (const char*)sc_arg0(regs);
char ktarget[128];
if (path_resolve_user(user_target, ktarget, sizeof(ktarget)) < 0) {
sc_ret(regs) = (uint32_t)-EFAULT;
return;
}
sc_ret(regs) = (uint32_t)vfs_umount(ktarget);
return;
}
if (syscall_no == SYSCALL_WAIT4) {
int pid = (int)sc_arg0(regs);
int* ustatus = (int*)sc_arg1(regs);
int options = (int)sc_arg2(regs);
void* urusage = (void*)sc_arg3(regs);
if (ustatus && user_range_ok(ustatus, sizeof(int)) == 0) {
sc_ret(regs) = (uint32_t)-EFAULT; return;
}
int kstatus = 0;
int ret = process_waitpid(pid, &kstatus, options);
if (ret < 0) {
sc_ret(regs) = (uint32_t)ret;
return;
}
if (ret > 0 && ustatus) {
(void)copy_to_user(ustatus, &kstatus, sizeof(kstatus));
}
if (ret > 0 && urusage) {
/* Zero out rusage — detailed accounting deferred */
char zeros[64];
memset(zeros, 0, sizeof(zeros));
(void)copy_to_user(urusage, zeros, sizeof(zeros));
}
sc_ret(regs) = (uint32_t)ret;
return;
}
if (syscall_no == SYSCALL_MADVISE) {
/* madvise — advisory only, always succeed (no-op) */
sc_ret(regs) = 0;
return;
}
if (syscall_no == SYSCALL_EXECVEAT) {
int dirfd = (int)sc_arg0(regs);
const char* user_path = (const char*)sc_arg1(regs);
const char* const* user_argv = (const char* const*)sc_arg2(regs);
const char* const* user_envp = (const char* const*)sc_arg3(regs);
/* uint32_t flags = sc_arg4(regs); -- AT_EMPTY_PATH etc, ignored */
/* Only AT_FDCWD supported for now */
if (dirfd != -100 /* AT_FDCWD */) {
sc_ret(regs) = (uint32_t)-ENOSYS;
return;
}
int r = syscall_execve_impl(regs, user_path, user_argv, user_envp);
if (r < 0) sc_ret(regs) = (uint32_t)r;
return;
}
sc_ret(regs) = (uint32_t)-ENOSYS;
}
void syscall_init(void) {
arch_syscall_init();
}