From: Tulio A M Mendes Date: Mon, 25 May 2026 12:38:59 +0000 (-0300) Subject: security: high-priority kernel memory fixes (Round 2) X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=344cd4345c3f78d548ca9cd63bc609d9abd340b1;p=AdrOS.git security: high-priority kernel memory fixes (Round 2) A03: POSIX fd mode checks for read/write syscalls - read: reject O_WRONLY fds (except char devices) - write: reject O_RDONLY fds (except char devices) - Fix pipe fd flags: read=O_RDONLY, write=O_WRONLY - Fix socket fd flags: O_RDWR (bidirectional) A06: POSIX permission checks for kill signals - process_kill: sender must be root or same uid - process_kill_pgrp: skip processes not owned by sender SYSENTER: validate user ESP before dereferencing - If ECX points into kernel space, zero args and return -EFAULT - Prevents kernel data leak via malicious ESP AIO/Socket: kernel bounce buffers for SMAP safety - AIO read/write: copy_from_user/copy_to_user via kmalloc buffer - Socket send/recv/sendto/recvfrom: same bounce buffer pattern - Max bounce size 4096 bytes vDSO: fix tick_hz mismatch - Use TIMER_HZ (100) instead of hardcoded 50 - Add timer.h include fulltest: fix fd open modes for POSIX compliance - All O_CREAT|O_TRUNC opens now include O_WRONLY - O_APPEND open includes O_WRONLY - Overlay test opens with O_RDWR - dup2 redirect test opens with O_RDWR - tmpfs append test opens with O_RDWR Tests: 116/116 smoke, 142/142 battery, 111/111 host, cppcheck clean --- diff --git a/src/arch/x86/sysenter.S b/src/arch/x86/sysenter.S index 7fcf2d54..0f21a6e4 100644 --- a/src/arch/x86/sysenter.S +++ b/src/arch/x86/sysenter.S @@ -80,9 +80,19 @@ sysenter_entry: * We already have return_eip in EDX, so: * real arg3 = *(ECX + 4) * real arg2 = *(ECX + 8) + * + * SECURITY: validate ECX is in user space before dereferencing. + * If ECX >= 0xC0000000 (kernel base), it could leak kernel data. */ + cmpl $0xC0000000, %ecx + jae 1f mov 4(%ecx), %edx /* EDX = arg3 (was saved by user) */ mov 8(%ecx), %ecx /* ECX = arg2 (was saved by user) */ + jmp 2f +1: /* Invalid user ESP — zero args to prevent data leak */ + xor %edx, %edx + xor %ecx, %ecx +2: /* pusha-equivalent: eax ecx edx ebx esp ebp esi edi */ push %eax diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c index 3b3d3ad3..afbdb85f 100644 --- a/src/kernel/scheduler.c +++ b/src/kernel/scheduler.c @@ -388,6 +388,14 @@ int process_kill(uint32_t pid, int sig) { return 0; } + /* A06: permission check (POSIX) — sender must be root or same uid */ + if (current_process && current_process->euid != 0) { + if (current_process->euid != p->uid && current_process->uid != p->uid) { + spin_unlock_irqrestore(&sched_lock, flags); + return -EPERM; + } + } + if (sig == SIG_KILL) { /* Remove from runqueue/sleep queue BEFORE marking ZOMBIE */ if (p->state == PROCESS_READY) { @@ -435,6 +443,11 @@ int process_kill_pgrp(uint32_t pgrp, int sig) { const struct process* const start = it; do { if (it->pgrp_id == pgrp && it->pid != 0 && it->state != PROCESS_ZOMBIE) { + /* A06: permission check — skip processes not owned by sender */ + if (current_process && current_process->euid != 0) { + if (current_process->euid != it->uid && current_process->uid != it->uid) + continue; + } it->sig_pending_mask |= (1U << (uint32_t)sig); if (it->state == PROCESS_BLOCKED || it->state == PROCESS_SLEEPING) { sleep_queue_remove(it); diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index ee768875..cf9af052 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -1649,12 +1649,23 @@ static int syscall_aio_rw_impl(void* user_cb, int is_write) { } int32_t result; + /* Use kernel bounce buffer to avoid passing user pointer directly to VFS (SMAP) */ + uint32_t xfer = cb.aio_nbytes; + if (xfer > 4096) xfer = 4096; /* cap per-call transfer */ + uint8_t* kbuf = (uint8_t*)kmalloc(xfer); + if (!kbuf) { cb.aio_error = ENOMEM; cb.aio_return = -ENOMEM; (void)copy_to_user(user_cb, &cb, sizeof(cb)); return 0; } + if (is_write) { + if (copy_from_user(kbuf, cb.aio_buf, xfer) < 0) { + kfree(kbuf); + cb.aio_error = EFAULT; cb.aio_return = -EFAULT; + (void)copy_to_user(user_cb, &cb, sizeof(cb)); + return 0; + } uint32_t (*fn_write)(fs_node_t*, uint32_t, uint32_t, const uint8_t*) = NULL; if (f->node->f_ops && f->node->f_ops->write) fn_write = f->node->f_ops->write; if (fn_write) { - result = (int32_t)fn_write(f->node, cb.aio_offset, cb.aio_nbytes, - (const uint8_t*)cb.aio_buf); + result = (int32_t)fn_write(f->node, cb.aio_offset, xfer, kbuf); } else { result = -ENOSYS; } @@ -1662,13 +1673,22 @@ static int syscall_aio_rw_impl(void* user_cb, int is_write) { uint32_t (*fn_read)(fs_node_t*, uint32_t, uint32_t, uint8_t*) = NULL; if (f->node->f_ops && f->node->f_ops->read) fn_read = f->node->f_ops->read; if (fn_read) { - result = (int32_t)fn_read(f->node, cb.aio_offset, cb.aio_nbytes, - (uint8_t*)cb.aio_buf); + result = (int32_t)fn_read(f->node, cb.aio_offset, xfer, kbuf); + if (result > 0) { + if (copy_to_user(cb.aio_buf, kbuf, (uint32_t)result) < 0) { + kfree(kbuf); + cb.aio_error = EFAULT; cb.aio_return = -EFAULT; + (void)copy_to_user(user_cb, &cb, sizeof(cb)); + return 0; + } + } } else { result = -ENOSYS; } } + kfree(kbuf); + cb.aio_error = (result < 0) ? -result : 0; cb.aio_return = result; (void)copy_to_user(user_cb, &cb, sizeof(cb)); @@ -1878,8 +1898,10 @@ static int pipe_create_kfds(int kfds[2]) { memset(rf, 0, sizeof(*rf)); memset(wf, 0, sizeof(*wf)); rf->node = rnode; + rf->flags = 0; /* O_RDONLY — read end */ rf->refcount = 1; wf->node = wnode; + wf->flags = 1; /* O_WRONLY — write end */ wf->refcount = 1; int rfd = fd_alloc(rf); @@ -2750,6 +2772,9 @@ static int syscall_read_impl(int fd, void* user_buf, uint32_t len) { struct file* f = fd_get(fd); if (!f || !f->node) return -EBADF; + /* A03: reject read on O_WRONLY fd (except char devices) */ + if ((f->flags & 3U) == 1U && f->node->flags != FS_CHARDEVICE) return -EBADF; + int nonblock = (f->flags & O_NONBLOCK) ? 1 : 0; { int (*fn_poll)(fs_node_t*, int) = NULL; @@ -2820,6 +2845,9 @@ static int syscall_write_impl(int fd, const void* user_buf, uint32_t len) { struct file* f = fd_get(fd); if (!f || !f->node) return -EBADF; + /* A03: reject write on O_RDONLY fd (except char devices) */ + if ((f->flags & 3U) == 0U && f->node->flags != FS_CHARDEVICE) return -EBADF; + /* Enforce MS_RDONLY: reject writes to read-only mounts */ if (f->mount_root && (vfs_node_mount_flags(f->mount_root) & MS_RDONLY)) return -EROFS; @@ -4637,7 +4665,7 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) if (!f) { sock_node_close(sn); sc_ret(regs) = (uint32_t)-ENOMEM; return; } f->node = sn; f->offset = 0; - f->flags = 0; + f->flags = 2; /* O_RDWR — sockets are bidirectional */ f->refcount = 1; int fd = fd_alloc(f); if (fd < 0) { sock_node_close(sn); kfree(f); sc_ret(regs) = (uint32_t)-EMFILE; return; } @@ -4676,7 +4704,7 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) if (!f) { sock_node_close(sn); sc_ret(regs) = (uint32_t)-ENOMEM; return; } f->node = sn; f->offset = 0; - f->flags = 0; + f->flags = 2; /* O_RDWR — sockets are bidirectional */ f->refcount = 1; int new_fd = fd_alloc(f); if (new_fd < 0) { sock_node_close(sn); kfree(f); sc_ret(regs) = (uint32_t)-EMFILE; return; } @@ -4705,7 +4733,16 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) if (!user_range_ok((const void*)sc_arg1(regs), len)) { sc_ret(regs) = (uint32_t)-EFAULT; return; } - sc_ret(regs) = (uint32_t)ksocket_send(sid, (const void*)sc_arg1(regs), len, (int)sc_arg3(regs)); + /* Bounce buffer for SMAP */ + size_t xfer = (len > 4096) ? 4096 : len; + uint8_t* kbuf = (uint8_t*)kmalloc(xfer); + if (!kbuf) { sc_ret(regs) = (uint32_t)-ENOMEM; return; } + if (copy_from_user(kbuf, (const void*)sc_arg1(regs), xfer) < 0) { + kfree(kbuf); sc_ret(regs) = (uint32_t)-EFAULT; return; + } + int sr = ksocket_send(sid, kbuf, xfer, (int)sc_arg3(regs)); + kfree(kbuf); + sc_ret(regs) = (uint32_t)sr; return; } @@ -4721,7 +4758,18 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) int rflags = (int)sc_arg3(regs); struct file* rf = fd_get(fd); if (rf && (rf->flags & O_NONBLOCK)) rflags |= O_NONBLOCK; - sc_ret(regs) = (uint32_t)ksocket_recv(sid, (void*)sc_arg1(regs), len, rflags); + /* Bounce buffer for SMAP */ + size_t xfer = (len > 4096) ? 4096 : len; + uint8_t* kbuf = (uint8_t*)kmalloc(xfer); + if (!kbuf) { sc_ret(regs) = (uint32_t)-ENOMEM; return; } + int rr = ksocket_recv(sid, kbuf, xfer, rflags); + if (rr > 0) { + if (copy_to_user((void*)sc_arg1(regs), kbuf, (size_t)rr) < 0) { + kfree(kbuf); sc_ret(regs) = (uint32_t)-EFAULT; return; + } + } + kfree(kbuf); + sc_ret(regs) = (uint32_t)rr; return; } @@ -4736,8 +4784,16 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) if (copy_from_user(&dest, (const void*)sc_arg4(regs), sizeof(dest)) < 0) { sc_ret(regs) = (uint32_t)-EFAULT; return; } - sc_ret(regs) = (uint32_t)ksocket_sendto(sid, (const void*)sc_arg1(regs), len, - (int)sc_arg3(regs), &dest); + /* Bounce buffer for SMAP */ + size_t xfer_s = (len > 4096) ? 4096 : len; + uint8_t* kbuf_s = (uint8_t*)kmalloc(xfer_s); + if (!kbuf_s) { sc_ret(regs) = (uint32_t)-ENOMEM; return; } + if (copy_from_user(kbuf_s, (const void*)sc_arg1(regs), xfer_s) < 0) { + kfree(kbuf_s); sc_ret(regs) = (uint32_t)-EFAULT; return; + } + int sr2 = ksocket_sendto(sid, kbuf_s, xfer_s, (int)sc_arg3(regs), &dest); + kfree(kbuf_s); + sc_ret(regs) = (uint32_t)sr2; return; } @@ -4755,10 +4811,20 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) if (rf && (rf->flags & O_NONBLOCK)) rflags |= O_NONBLOCK; struct sockaddr_in src; memset(&src, 0, sizeof(src)); - int ret = ksocket_recvfrom(sid, (void*)sc_arg1(regs), len, rflags, &src); - if (ret > 0 && sc_arg4(regs)) { - (void)copy_to_user((void*)sc_arg4(regs), &src, sizeof(src)); + /* Bounce buffer for SMAP */ + size_t xfer_r = (len > 4096) ? 4096 : len; + uint8_t* kbuf_r = (uint8_t*)kmalloc(xfer_r); + if (!kbuf_r) { sc_ret(regs) = (uint32_t)-ENOMEM; return; } + int ret = ksocket_recvfrom(sid, kbuf_r, xfer_r, rflags, &src); + if (ret > 0) { + if (copy_to_user((void*)sc_arg1(regs), kbuf_r, (size_t)ret) < 0) { + kfree(kbuf_r); sc_ret(regs) = (uint32_t)-EFAULT; return; + } + if (sc_arg4(regs)) { + (void)copy_to_user((void*)sc_arg4(regs), &src, sizeof(src)); + } } + kfree(kbuf_r); sc_ret(regs) = (uint32_t)ret; return; } diff --git a/src/kernel/vdso.c b/src/kernel/vdso.c index b4405ae9..1b315bc0 100644 --- a/src/kernel/vdso.c +++ b/src/kernel/vdso.c @@ -13,6 +13,7 @@ #include "vmm.h" #include "utils.h" #include "console.h" +#include "timer.h" static uintptr_t vdso_phys = 0; static volatile struct vdso_data* vdso_kptr = NULL; @@ -32,7 +33,7 @@ void vdso_init(void) { vdso_kptr = (volatile struct vdso_data*)kva; memset((void*)vdso_kptr, 0, PAGE_SIZE); - vdso_kptr->tick_hz = 50; + vdso_kptr->tick_hz = TIMER_HZ; kprintf("[VDSO] Initialized at phys=0x%x\n", (unsigned)vdso_phys); } diff --git a/user/cmds/fulltest/fulltest.c b/user/cmds/fulltest/fulltest.c index b8b87710..4e777ab0 100644 --- a/user/cmds/fulltest/fulltest.c +++ b/user/cmds/fulltest/fulltest.c @@ -1781,7 +1781,7 @@ void _start(void) { sys_exit(1); } - fd = sys_open("/sbin/fulltest", 0); + fd = sys_open("/sbin/fulltest", 2); /* O_RDWR — need write for overlay cow */ if (fd < 0) { sys_write(1, "[test] overlay open failed\n", (uint32_t)(sizeof("[test] overlay open failed\n") - 1)); @@ -1808,7 +1808,7 @@ void _start(void) { sys_exit(1); } - fd = sys_open("/sbin/fulltest", 0); + fd = sys_open("/sbin/fulltest", 2); /* O_RDWR */ if (fd < 0) { sys_write(1, "[test] overlay open2 failed\n", (uint32_t)(sizeof("[test] overlay open2 failed\n") - 1)); @@ -1888,7 +1888,7 @@ void _start(void) { sys_write(1, "[test] lseek/stat/fstat OK\n", (uint32_t)(sizeof("[test] lseek/stat/fstat OK\n") - 1)); - fd = sys_open("/tmp/hello.txt", 0); + fd = sys_open("/tmp/hello.txt", 2); /* O_RDWR — need write for dup2 redirect */ if (fd < 0) { sys_write(1, "[test] tmpfs open failed\n", (uint32_t)(sizeof("[test] tmpfs open failed\n") - 1)); @@ -2534,7 +2534,7 @@ void _start(void) { sys_exit(1); } - fd = sys_open("/tmp/hello.txt", 0); + fd = sys_open("/tmp/hello.txt", 2); /* O_RDWR — need write for append test */ if (fd < 0) { sys_write(1, "[test] tmpfs open3 failed\n", (uint32_t)(sizeof("[test] tmpfs open3 failed\n") - 1)); @@ -2778,7 +2778,7 @@ void _start(void) { } // Create file using relative path. - int fd = sys_open("rel", O_CREAT | O_TRUNC); + int fd = sys_open("rel", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] open relative failed\n", (uint32_t)(sizeof("[test] open relative failed\n") - 1)); @@ -2834,7 +2834,7 @@ void _start(void) { // B9: rename + rmdir smoke (rename may be ENOSYS on tmpfs) { // Create a file, rename it, verify old gone and new exists. - int fd = sys_open("/tmp/rnold", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/rnold", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] rename: create failed\n", (uint32_t)(sizeof("[test] rename: create failed\n") - 1)); @@ -3044,7 +3044,7 @@ void _start(void) { // C7: pread/pwrite (positional I/O) { - int fd = sys_open("/tmp/preadtest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/preadtest", O_CREAT | O_TRUNC | 2); /* O_RDWR */ if (fd < 0) { sys_write(1, "[test] pread test open failed\n", (uint32_t)(sizeof("[test] pread test open failed\n") - 1)); sys_exit(1); @@ -3076,7 +3076,7 @@ void _start(void) { // C8: ftruncate (may be ENOSYS on tmpfs) { - int fd = sys_open("/tmp/trunctest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/trunctest", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] truncate open failed\n", (uint32_t)(sizeof("[test] truncate open failed\n") - 1)); sys_exit(1); @@ -3222,7 +3222,7 @@ void _start(void) { // C14: O_APPEND { - int fd = sys_open("/tmp/appendtest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/appendtest", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] O_APPEND create failed\n", (uint32_t)(sizeof("[test] O_APPEND create failed\n") - 1)); sys_exit(1); @@ -3230,7 +3230,7 @@ void _start(void) { (void)sys_write(fd, "AAA", 3); (void)sys_close(fd); - fd = sys_open("/tmp/appendtest", O_APPEND); + fd = sys_open("/tmp/appendtest", O_APPEND | 1); /* O_WRONLY */ if (fd < 0) { sys_write(1, "[test] O_APPEND open failed\n", (uint32_t)(sizeof("[test] O_APPEND open failed\n") - 1)); sys_exit(1); @@ -3380,7 +3380,7 @@ void _start(void) { // C21: hard link (skip gracefully if FS doesn't support it) { - int fd = sys_open("/tmp/linkoriginal", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/linkoriginal", O_CREAT | O_TRUNC | 1); if (fd >= 0) { (void)sys_write(fd, "LNK", 3); (void)sys_close(fd); @@ -3537,7 +3537,7 @@ void _start(void) { // C24: aio_read/aio_write smoke { - int fd = sys_open("/tmp/aiotest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/aiotest", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] aio open failed\n", (uint32_t)(sizeof("[test] aio open failed\n") - 1)); sys_exit(1); @@ -3719,7 +3719,7 @@ void _start(void) { // D7: fsync { - int fd = sys_open("/tmp/fsynctest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/fsynctest", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] fsync open failed\n", (uint32_t)(sizeof("[test] fsync open failed\n") - 1)); sys_exit(1); @@ -3757,7 +3757,7 @@ void _start(void) { // D8: truncate (path-based, may be ENOSYS on tmpfs) { - int fd = sys_open("/tmp/truncpath", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/truncpath", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] truncate open failed\n", (uint32_t)(sizeof("[test] truncate open failed\n") - 1)); sys_exit(1); @@ -3788,7 +3788,7 @@ void _start(void) { // D10: chmod (may be ENOSYS on some FS) { - int fd = sys_open("/tmp/chmodtest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/chmodtest", O_CREAT | O_TRUNC | 1); if (fd >= 0) { (void)sys_close(fd); int r = sys_chmod("/tmp/chmodtest", 0755); @@ -3803,7 +3803,7 @@ void _start(void) { // D11: flock (LOCK_EX=2, LOCK_UN=8) { - int fd = sys_open("/tmp/flocktest", O_CREAT | O_TRUNC); + int fd = sys_open("/tmp/flocktest", O_CREAT | O_TRUNC | 1); if (fd < 0) { sys_write(1, "[test] flock open failed\n", (uint32_t)(sizeof("[test] flock open failed\n") - 1)); sys_exit(1);