From: Tulio A M Mendes Date: Tue, 10 Feb 2026 11:29:36 +0000 (-0300) Subject: fix: 4 CRITICAL security/race bugs from audit X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=e05f42e55232964d1885e10b6cef138b39f892db;p=AdrOS.git fix: 4 CRITICAL security/race bugs from audit 3.1: user_range_ok weak default now rejects kernel addresses (>= 0xC0000000) Prevents privilege escalation via syscall arguments on non-x86 fallback. 3.2: sigreturn sanitizes eflags — clears IOPL bits, ensures IF set. Prevents userspace from gaining port I/O access via crafted sigframe. 2.1: PMM bitmap/refcount now protected by spinlock_t pmm_lock. Prevents SMP race where two CPUs could allocate the same physical frame. All public PMM functions (alloc, free, mark_region, incref, decref, get_refcount) now use spin_lock_irqsave/spin_unlock_irqrestore. 2.2: file->refcount now uses __sync_fetch_and_add / __sync_sub_and_fetch. Prevents use-after-free in fork/dup/dup2/dup3/close when timer IRQ fires and schedule() runs process_close_all_files_locked concurrently. --- diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c index fbdd6a7..f3a02fc 100644 --- a/src/kernel/scheduler.c +++ b/src/kernel/scheduler.c @@ -144,10 +144,7 @@ static void process_close_all_files_locked(struct process* p) { if (!f) continue; p->files[fd] = NULL; - if (f->refcount > 0) { - f->refcount--; - } - if (f->refcount == 0) { + if (__sync_sub_and_fetch(&f->refcount, 1) == 0) { if (f->node) { vfs_close(f->node); } diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index 7f90fb4..7395e20 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -194,7 +194,7 @@ static int syscall_fork_impl(struct registers* regs) { for (int fd = 0; fd < PROCESS_MAX_FILES; fd++) { struct file* f = current_process->files[fd]; if (!f) continue; - f->refcount++; + __sync_fetch_and_add(&f->refcount, 1); child->files[fd] = f; child->fd_flags[fd] = current_process->fd_flags[fd]; } @@ -574,10 +574,7 @@ static int fd_close(int fd) { if (!f) return -EBADF; current_process->files[fd] = NULL; - if (f->refcount > 0) { - f->refcount--; - } - if (f->refcount == 0) { + if (__sync_sub_and_fetch(&f->refcount, 1) == 0) { if (f->node) { vfs_close(f->node); } @@ -589,10 +586,10 @@ static int fd_close(int fd) { static int syscall_dup_impl(int oldfd) { struct file* f = fd_get(oldfd); if (!f) return -EBADF; - f->refcount++; + __sync_fetch_and_add(&f->refcount, 1); int newfd = fd_alloc_from(0, f); if (newfd < 0) { - f->refcount--; + __sync_sub_and_fetch(&f->refcount, 1); return -EMFILE; } return newfd; @@ -759,7 +756,7 @@ static int syscall_dup2_impl(int oldfd, int newfd) { (void)fd_close(newfd); } - f->refcount++; + __sync_fetch_and_add(&f->refcount, 1); current_process->files[newfd] = f; return newfd; } @@ -776,7 +773,7 @@ static int syscall_dup3_impl(int oldfd, int newfd, uint32_t flags) { (void)fd_close(newfd); } - f->refcount++; + __sync_fetch_and_add(&f->refcount, 1); current_process->files[newfd] = f; return newfd; } @@ -1392,6 +1389,10 @@ static int syscall_sigreturn_impl(struct registers* regs, const struct sigframe* if ((f.saved.cs & 3U) != 3U) return -EPERM; if ((f.saved.ss & 3U) != 3U) return -EPERM; + // Sanitize eflags: clear IOPL (bits 12-13) to prevent privilege escalation, + // ensure IF (bit 9) is set so interrupts remain enabled in usermode. + f.saved.eflags = (f.saved.eflags & ~0x3000U) | 0x200U; + // Restore the full saved trapframe. The interrupt stub will pop these regs and iret. *regs = f.saved; return 0; diff --git a/src/kernel/uaccess.c b/src/kernel/uaccess.c index 28222bd..5fc7d99 100644 --- a/src/kernel/uaccess.c +++ b/src/kernel/uaccess.c @@ -13,13 +13,23 @@ int uaccess_try_recover(uintptr_t fault_addr, struct registers* regs) { return 0; } +/* Conservative kernel/user boundary for the weak default. + * Architecture-specific overrides (e.g. x86 uaccess.c) refine this + * with page-table walks. The weak default MUST reject kernel addresses + * to prevent privilege escalation via syscall arguments. */ +#ifndef USER_ADDR_LIMIT +#define USER_ADDR_LIMIT 0xC0000000U +#endif + __attribute__((weak)) int user_range_ok(const void* user_ptr, size_t len) { uintptr_t uaddr = (uintptr_t)user_ptr; if (len == 0) return 1; if (uaddr == 0) return 0; uintptr_t end = uaddr + len - 1; - if (end < uaddr) return 0; + if (end < uaddr) return 0; /* overflow */ + if (uaddr >= USER_ADDR_LIMIT) return 0; /* kernel address */ + if (end >= USER_ADDR_LIMIT) return 0; /* spans into kernel */ return 1; } diff --git a/src/mm/pmm.c b/src/mm/pmm.c index c6a34b1..9150354 100644 --- a/src/mm/pmm.c +++ b/src/mm/pmm.c @@ -3,6 +3,7 @@ #include "uart_console.h" #include "hal/cpu.h" #include "hal/mm.h" +#include "spinlock.h" #include #include @@ -23,6 +24,7 @@ static uint64_t total_memory = 0; static uint64_t used_memory = 0; static uint64_t max_frames = 0; static uint64_t last_alloc_frame = 1; +static spinlock_t pmm_lock = {0}; static uint64_t align_down(uint64_t value, uint64_t align) { return value & ~(align - 1); @@ -48,6 +50,7 @@ void pmm_mark_region(uint64_t base, uint64_t size, int used) { uint64_t start_frame = base / PAGE_SIZE; uint64_t frames_count = size / PAGE_SIZE; + uintptr_t flags = spin_lock_irqsave(&pmm_lock); for (uint64_t i = 0; i < frames_count; i++) { if (start_frame + i >= max_frames) break; @@ -66,6 +69,7 @@ void pmm_mark_region(uint64_t base, uint64_t size, int used) { } } } + spin_unlock_irqrestore(&pmm_lock, flags); } void pmm_set_limits(uint64_t total_mem, uint64_t max_fr) { @@ -126,6 +130,8 @@ void pmm_init(void* boot_info) { } void* pmm_alloc_page(void) { + uintptr_t flags = spin_lock_irqsave(&pmm_lock); + // Start from frame 1 so we never return physical address 0. if (last_alloc_frame < 1) last_alloc_frame = 1; if (last_alloc_frame >= max_frames) last_alloc_frame = 1; @@ -142,9 +148,12 @@ void* pmm_alloc_page(void) { used_memory += PAGE_SIZE; last_alloc_frame = i + 1; if (last_alloc_frame >= max_frames) last_alloc_frame = 1; + spin_unlock_irqrestore(&pmm_lock, flags); return (void*)(uintptr_t)(i * PAGE_SIZE); } } + + spin_unlock_irqrestore(&pmm_lock, flags); return NULL; // OOM } @@ -153,36 +162,48 @@ void pmm_free_page(void* ptr) { uint64_t frame = addr / PAGE_SIZE; if (frame == 0 || frame >= max_frames) return; + uintptr_t flags = spin_lock_irqsave(&pmm_lock); + uint16_t rc = frame_refcount[frame]; if (rc > 1) { - __sync_sub_and_fetch(&frame_refcount[frame], 1); + frame_refcount[frame]--; + spin_unlock_irqrestore(&pmm_lock, flags); return; } frame_refcount[frame] = 0; bitmap_unset(frame); used_memory -= PAGE_SIZE; + + spin_unlock_irqrestore(&pmm_lock, flags); } void pmm_incref(uintptr_t paddr) { uint64_t frame = paddr / PAGE_SIZE; if (frame == 0 || frame >= max_frames) return; - __sync_fetch_and_add(&frame_refcount[frame], 1); + uintptr_t flags = spin_lock_irqsave(&pmm_lock); + frame_refcount[frame]++; + spin_unlock_irqrestore(&pmm_lock, flags); } uint16_t pmm_decref(uintptr_t paddr) { uint64_t frame = paddr / PAGE_SIZE; if (frame == 0 || frame >= max_frames) return 0; - uint16_t new_val = __sync_sub_and_fetch(&frame_refcount[frame], 1); + uintptr_t flags = spin_lock_irqsave(&pmm_lock); + uint16_t new_val = --frame_refcount[frame]; if (new_val == 0) { bitmap_unset(frame); used_memory -= PAGE_SIZE; } + spin_unlock_irqrestore(&pmm_lock, flags); return new_val; } uint16_t pmm_get_refcount(uintptr_t paddr) { uint64_t frame = paddr / PAGE_SIZE; if (frame >= max_frames) return 0; - return frame_refcount[frame]; + uintptr_t flags = spin_lock_irqsave(&pmm_lock); + uint16_t rc = frame_refcount[frame]; + spin_unlock_irqrestore(&pmm_lock, flags); + return rc; }