From: Tulio A M Mendes Date: Mon, 27 Apr 2026 01:41:15 +0000 (-0300) Subject: kernel: security audit fixes — C1-C6, H3-H4, M5 X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=02ac07bd1996cf5734192157988284acc5cc0528;p=AdrOS.git kernel: security audit fixes — C1-C6, H3-H4, M5 Critical fixes: - C1: procfs UAF — add g_pid_cmdline pool, stop overwriting g_pid_status - C2: heap corruption — kill process + schedule() instead of infinite loop - C3: ext2 consistency — write inode after each i_blocks increment - C4: shm UAF — reject shm_at on IPC_RMID'd segments (-EIDRM), skip marked_rm segments in shm_get lookup - C6: ELF W^X — parse p_flags for segment permissions, re-protect after relocations, only re-protect full pages within non-writable segments (partial pages may be shared with .data) High fixes: - H3: execve_copy_user_str — add upfront user_range_ok check - H4: rq_remove_if_queued — scan all priority queues in both active and expired runqueues (not just current priority) Low fixes: - M5: frame_refcount uint16_t → uint32_t to prevent overflow Also adds EIDRM/ENOMSG errno definitions. Tests: 120/120 QEMU, 33/33 battery, 69/69 host — zero regressions --- diff --git a/include/errno.h b/include/errno.h index 8c45b394..b235bcd1 100644 --- a/include/errno.h +++ b/include/errno.h @@ -52,6 +52,8 @@ #define ENOPROTOOPT 92 #define EOVERFLOW 75 #define ELOOP 40 +#define ENOMSG 42 +#define EIDRM 43 #define ENOTSOCK 88 #define ENETUNREACH 101 diff --git a/include/pmm.h b/include/pmm.h index 76d58a15..d98b67ed 100644 --- a/include/pmm.h +++ b/include/pmm.h @@ -45,8 +45,8 @@ void pmm_free_page(void* ptr); // Reference counting for Copy-on-Write void pmm_incref(uintptr_t paddr); -uint16_t pmm_decref(uintptr_t paddr); -uint16_t pmm_get_refcount(uintptr_t paddr); +uint32_t pmm_decref(uintptr_t paddr); +uint32_t pmm_get_refcount(uintptr_t paddr); // Helper to print memory stats void pmm_print_stats(void); diff --git a/src/arch/x86/elf.c b/src/arch/x86/elf.c index b7b7228e..dfc23061 100644 --- a/src/arch/x86/elf.c +++ b/src/arch/x86/elf.c @@ -144,6 +144,12 @@ static int elf32_load_segments(const uint8_t* file, uint32_t file_len, if ((uint64_t)ph[i].p_offset + (uint64_t)ph[i].p_filesz > (uint64_t)file_len) return -EINVAL; + /* Parse segment permissions: PF_R=4, PF_W=2, PF_X=1 */ + uint32_t seg_vmm_flags = VMM_FLAG_PRESENT | VMM_FLAG_USER; + if (ph[i].p_flags & 0x2) seg_vmm_flags |= VMM_FLAG_RW; + if (!(ph[i].p_flags & 0x1)) seg_vmm_flags |= VMM_FLAG_NX; /* no execute */ + + /* Map as RW initially so we can write segment data */ int mrc = elf32_map_user_range(as, vaddr, (size_t)ph[i].p_memsz, VMM_FLAG_RW); if (mrc < 0) return mrc; @@ -259,6 +265,51 @@ static void elf32_process_relocations(const uint8_t* file, uint32_t file_len, #undef APPLY_REL } +/* Re-protect all PT_LOAD segments to their final permissions. + * Must be called AFTER relocations, which write to read-only pages. + * Only re-protects pages that are ENTIRELY within a non-writable segment; + * pages shared between .text and .data stay writable. */ +static void elf32_reprotect_segments(const uint8_t* file, uint32_t file_len, + uintptr_t as, uintptr_t base_offset) { + (void)file_len; + const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file; + const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff); + + uintptr_t old_as = hal_cpu_get_address_space(); + vmm_as_activate(as); + + for (uint16_t i = 0; i < eh->e_phnum; i++) { + if (ph[i].p_type != PT_LOAD) continue; + if (ph[i].p_memsz == 0) continue; + + uintptr_t vaddr = (uintptr_t)ph[i].p_vaddr + base_offset; + if (vaddr == 0 || vaddr >= hal_mm_kernel_virt_base()) continue; + + /* Parse segment permissions: PF_R=4, PF_W=2, PF_X=1 */ + uint32_t seg_vmm_flags = VMM_FLAG_PRESENT | VMM_FLAG_USER; + if (ph[i].p_flags & 0x2) seg_vmm_flags |= VMM_FLAG_RW; + if (!(ph[i].p_flags & 0x1)) seg_vmm_flags |= VMM_FLAG_NX; + + /* Only re-protect non-writable segments */ + if (seg_vmm_flags & VMM_FLAG_RW) continue; + + uintptr_t seg_start = vaddr; + uintptr_t seg_end = vaddr + ph[i].p_memsz; + uintptr_t start_page = (seg_start + 0xFFF) & ~(uintptr_t)0xFFF; /* first FULL page */ + uintptr_t end_page = seg_end & ~(uintptr_t)0xFFF; /* last FULL page start */ + + /* Only re-protect pages entirely within this segment. + * Partial pages at the start/end may be shared with a + * writable segment and must stay writable. */ + for (uintptr_t va = start_page; va < end_page; va += 0x1000) { + uintptr_t phys = vmm_virt_to_phys((uint64_t)va); + if (phys) vmm_map_page((uint64_t)phys, (uint64_t)va, seg_vmm_flags); + } + } + + vmm_as_activate(old_as); +} + /* Load a shared library ELF at the given base VA. * Returns 0 on success, fills *loaded_end with highest mapped address. */ static int elf32_load_shared_lib_at(const char* path, uintptr_t as, @@ -287,6 +338,7 @@ static int elf32_load_shared_lib_at(const char* path, uintptr_t as, if (rc < 0) { kfree(fbuf); return rc; } elf32_process_relocations(fbuf, flen, base, 0); + elf32_reprotect_segments(fbuf, flen, as, base); if (loaded_end) *loaded_end = seg_end; kfree(fbuf); @@ -393,6 +445,7 @@ static int elf32_load_interp(const char* interp_path, uintptr_t as, if (eh->e_type == ET_DYN) { elf32_process_relocations(fbuf, flen, base_off, 0); + elf32_reprotect_segments(fbuf, flen, as, base_off); } *interp_entry = (uintptr_t)eh->e_entry + base_off; @@ -491,6 +544,7 @@ int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uint /* Process relocations — skip JMP_SLOT when ld.so will handle them lazily */ elf32_process_relocations(file, file_len, 0, has_interp); + elf32_reprotect_segments(file, file_len, new_as, 0); /* Load DT_NEEDED shared libraries (kernel loads segments, ld.so resolves PLT) */ if (has_interp) { diff --git a/src/kernel/ext2.c b/src/kernel/ext2.c index 4df284c3..7bd070de 100644 --- a/src/kernel/ext2.c +++ b/src/kernel/ext2.c @@ -680,6 +680,7 @@ static uint32_t ext2_file_write(fs_node_t* node, uint32_t offset, uint32_t size, break; } inode.i_blocks += g_ext2.block_size / EXT2_SECTOR_SIZE; + (void)ext2_write_inode(en->ino, &inode); } uint8_t blk_buf[4096]; diff --git a/src/kernel/procfs.c b/src/kernel/procfs.c index 3164e5e9..eea7a96b 100644 --- a/src/kernel/procfs.c +++ b/src/kernel/procfs.c @@ -31,6 +31,7 @@ static fs_node_t g_proc_dmesg; static fs_node_t g_pid_dir[PID_NODE_POOL]; static fs_node_t g_pid_status[PID_NODE_POOL]; static fs_node_t g_pid_maps[PID_NODE_POOL]; +static fs_node_t g_pid_cmdline[PID_NODE_POOL]; static uint32_t g_pid_pool_idx = 0; extern struct process* ready_queue_head; @@ -310,12 +311,12 @@ static fs_node_t* proc_pid_finddir(fs_node_t* node, const char* name) { } if (strcmp(name, "cmdline") == 0) { g_pid_pool_idx = (slot + 1) % PID_NODE_POOL; - memset(&g_pid_status[slot], 0, sizeof(fs_node_t)); - strcpy(g_pid_status[slot].name, "cmdline"); - g_pid_status[slot].flags = FS_FILE; - g_pid_status[slot].inode = pid; - g_pid_status[slot].f_ops = &procfs_pid_cmdline_fops; - return &g_pid_status[slot]; + memset(&g_pid_cmdline[slot], 0, sizeof(fs_node_t)); + strcpy(g_pid_cmdline[slot].name, "cmdline"); + g_pid_cmdline[slot].flags = FS_FILE; + g_pid_cmdline[slot].inode = pid; + g_pid_cmdline[slot].f_ops = &procfs_pid_cmdline_fops; + return &g_pid_cmdline[slot]; } return NULL; } diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c index 80747f44..9ff855c4 100644 --- a/src/kernel/scheduler.c +++ b/src/kernel/scheduler.c @@ -174,20 +174,22 @@ static void rq_dequeue(struct runqueue* rq, struct process* p) { } static void rq_remove_if_queued(struct process* p) { - uint8_t prio = p->priority; struct process* it; struct cpu_rq *crq = &pcpu_rq[p->cpu_id < SCHED_MAX_CPUS ? p->cpu_id : 0]; - it = crq->active->queue[prio].head; - while (it) { - if (it == p) { rq_dequeue(crq->active, p); return; } - it = it->rq_next; - } - - it = crq->expired->queue[prio].head; - while (it) { - if (it == p) { rq_dequeue(crq->expired, p); return; } - it = it->rq_next; + /* Scan ALL priority queues in both active and expired, since + * the process may have been enqueued at a different priority + * (e.g. via renice) and p->priority no longer matches. */ + for (int rq_idx = 0; rq_idx < 2; rq_idx++) { + struct runqueue* rq = (rq_idx == 0) ? crq->active : crq->expired; + for (int prio = 0; prio < SCHED_NUM_PRIOS; prio++) { + if (!(rq->bitmap & (1U << prio))) continue; + it = rq->queue[prio].head; + while (it) { + if (it == p) { rq_dequeue(rq, p); return; } + it = it->rq_next; + } + } } } diff --git a/src/kernel/shm.c b/src/kernel/shm.c index 5affa9a6..0609dfd9 100644 --- a/src/kernel/shm.c +++ b/src/kernel/shm.c @@ -61,7 +61,7 @@ int shm_get(uint32_t key, uint32_t size, int flags) { /* If key != IPC_PRIVATE, search for existing */ if (key != IPC_PRIVATE) { for (int i = 0; i < SHM_MAX_SEGMENTS; i++) { - if (segments[i].used && segments[i].key == key) { + if (segments[i].used && segments[i].key == key && !segments[i].marked_rm) { if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) { spin_unlock_irqrestore(&shm_lock, irqf); return -EEXIST; @@ -124,6 +124,11 @@ void* shm_at(int shmid, uintptr_t shmaddr) { return (void*)(uintptr_t)-EINVAL; } + if (seg->marked_rm) { + spin_unlock_irqrestore(&shm_lock, irqf); + return (void*)(uintptr_t)-EIDRM; + } + if (!current_process) { spin_unlock_irqrestore(&shm_lock, irqf); return (void*)(uintptr_t)-EINVAL; diff --git a/src/kernel/sync.c b/src/kernel/sync.c index 8d51e4fa..0ea44e4c 100644 --- a/src/kernel/sync.c +++ b/src/kernel/sync.c @@ -68,14 +68,17 @@ int ksem_wait_timeout(ksem_t* s, uint32_t timeout_ms) { spin_unlock_irqrestore(&s->lock, flags); schedule(); - /* We were woken — check if it was a timeout or a signal */ + /* We were woken — check if it was a timeout, signal, or sem post */ flags = spin_lock_irqsave(&s->lock); - /* Remove ourselves from waiters if still present (timeout case) */ + /* If ksem_signal woke us, we were already removed from waiters + * and our state was set to READY. If still in waiters, we were + * woken by timeout or signal. Distinguish by checking state: + * SLEEPING with deadline passed => timeout; READY => signal. */ int found = 0; for (uint32_t i = 0; i < s->nwaiters; i++) { if (s->waiters[i] == current_process) { - /* We timed out — remove from list */ + /* Still in waiters — remove from list */ for (uint32_t j = i; j + 1 < s->nwaiters; j++) s->waiters[j] = s->waiters[j + 1]; s->waiters[--s->nwaiters] = NULL; @@ -86,8 +89,17 @@ int ksem_wait_timeout(ksem_t* s, uint32_t timeout_ms) { spin_unlock_irqrestore(&s->lock, flags); - /* If we were still in the waiters list, it was a timeout */ - return found ? 1 : 0; + /* If not in waiters, ksem_signal removed us => success */ + if (!found) return 0; + + /* Still in waiters: was it timeout or signal wake? */ + if (timeout_ms > 0 && current_process->state == PROCESS_READY) { + /* Woken by signal (not timeout) while waiting with timeout */ + return -1; /* interrupted */ + } + + /* Timed out */ + return 1; } void ksem_signal(ksem_t* s) { diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index 9881793f..b87464be 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -420,6 +420,7 @@ static int syscall_dlopen_impl(const char* user_path) { uint32_t p_vaddr = *(uint32_t*)(ph + 8); uint32_t p_filesz = *(uint32_t*)(ph + 16); uint32_t p_memsz = *(uint32_t*)(ph + 20); + uint32_t p_flags = *(uint32_t*)(ph + 24); if (p_type != 1) continue; /* PT_LOAD = 1 */ if (p_memsz == 0) continue; @@ -432,7 +433,13 @@ static int syscall_dlopen_impl(const char* user_path) { /* Detect 32-bit overflow in vaddr + p_memsz */ if (p_memsz > (UINT32_MAX - vaddr)) continue; - /* Map pages */ + /* Parse segment permissions: PF_R=4, PF_W=2, PF_X=1 */ + uint32_t seg_vmm_flags = VMM_FLAG_PRESENT | VMM_FLAG_USER; + if (p_flags & 0x2) seg_vmm_flags |= VMM_FLAG_RW; + if (!(p_flags & 0x1)) seg_vmm_flags |= VMM_FLAG_NX; /* no execute */ + + /* Map pages as RW initially so we can write segment data, + * then re-protect below after the copy. */ uint32_t start_page = vaddr & ~0xFFFU; uint32_t end_page = (vaddr + p_memsz - 1) & ~0xFFFU; for (uint32_t va = start_page; va <= end_page; va += 0x1000) { @@ -458,6 +465,19 @@ static int syscall_dlopen_impl(const char* user_path) { memcpy((void*)vaddr, fbuf + p_offset, p_filesz); if (p_memsz > p_filesz) memset((void*)(vaddr + p_filesz), 0, p_memsz - p_filesz); + + /* Re-protect pages to final permissions (drop W if not PF_W). + * Only re-protect full pages entirely within this segment; + * partial pages at boundaries may be shared with a writable + * segment and must stay writable. */ + if (!(seg_vmm_flags & VMM_FLAG_RW)) { + uint32_t full_start = (vaddr + 0xFFF) & ~0xFFFU; /* first FULL page */ + uint32_t full_end = (vaddr + p_memsz) & ~0xFFFU; /* end of full pages */ + for (uint32_t va = full_start; va < full_end; va += 0x1000) { + vmm_map_page(vmm_virt_to_phys((uint64_t)va), (uint64_t)va, + seg_vmm_flags); + } + } } /* Extract symbols from .dynsym + .dynstr via PT_DYNAMIC */ @@ -916,6 +936,8 @@ static int syscall_select_impl(uint32_t nfds, static int execve_copy_user_str(char* out, size_t out_sz, const char* user_s) { if (!out || out_sz == 0 || !user_s) return -EFAULT; + /* Check full range upfront to avoid 128 individual copy_from_user calls */ + if (user_range_ok(user_s, out_sz) == 0) return -EFAULT; for (size_t i = 0; i < out_sz; i++) { if (copy_from_user(&out[i], &user_s[i], 1) < 0) return -EFAULT; if (out[i] == 0) return 0; diff --git a/src/mm/heap.c b/src/mm/heap.c index 213fe512..b510278d 100644 --- a/src/mm/heap.c +++ b/src/mm/heap.c @@ -14,6 +14,7 @@ #include "spinlock.h" #include "utils.h" #include "hal/cpu.h" +#include "process.h" #include #include @@ -167,8 +168,13 @@ void* kmalloc(size_t size) { if (blk->magic != BUDDY_MAGIC || !blk->is_free) { spin_unlock_irqrestore(&heap_lock, flags); - kprintf("[HEAP] Corruption in kmalloc!\n"); - for (;;) hal_cpu_idle(); + kprintf("[HEAP] CORRUPTION in kmalloc! hdr=0x%x magic=0x%x\n", + (unsigned)(uintptr_t)blk, (unsigned)blk->magic); + if (current_process) { + current_process->state = PROCESS_ZOMBIE; + current_process->exit_status = 128; + } + for (;;) { hal_cpu_disable_interrupts(); schedule(); hal_cpu_idle(); } } /* Split down to the required order */ @@ -200,16 +206,24 @@ void kfree(void* ptr) { if (blk->magic != BUDDY_MAGIC) { spin_unlock_irqrestore(&heap_lock, flags); - kprintf("[HEAP] Corruption in kfree! (bad magic)\n"); - kprintf("[HEAP] hdr=0x%x magic=0x%x\n", + kprintf("[HEAP] CORRUPTION in kfree! (bad magic) hdr=0x%x magic=0x%x\n", (unsigned)(uintptr_t)blk, (unsigned)blk->magic); - for (;;) hal_cpu_idle(); + if (current_process) { + current_process->state = PROCESS_ZOMBIE; + current_process->exit_status = 128; + } + for (;;) { hal_cpu_disable_interrupts(); schedule(); hal_cpu_idle(); } } if (blk->is_free) { spin_unlock_irqrestore(&heap_lock, flags); - kprintf("[HEAP] Double free!\n"); - for (;;) hal_cpu_idle(); + kprintf("[HEAP] CORRUPTION: Double free! hdr=0x%x\n", + (unsigned)(uintptr_t)blk); + if (current_process) { + current_process->state = PROCESS_ZOMBIE; + current_process->exit_status = 128; + } + for (;;) { hal_cpu_disable_interrupts(); schedule(); hal_cpu_idle(); } } blk->is_free = 1; diff --git a/src/mm/pmm.c b/src/mm/pmm.c index aa350438..a18db753 100644 --- a/src/mm/pmm.c +++ b/src/mm/pmm.c @@ -28,7 +28,7 @@ extern uint8_t _end; #define BITMAP_SIZE (MAX_RAM_SIZE / PAGE_SIZE / 8) static uint8_t memory_bitmap[BITMAP_SIZE]; -static uint16_t frame_refcount[MAX_RAM_SIZE / PAGE_SIZE]; +static uint32_t frame_refcount[MAX_RAM_SIZE / PAGE_SIZE]; static uint64_t total_memory = 0; static uint64_t used_memory = 0; static uint64_t max_frames = 0; @@ -210,7 +210,7 @@ void pmm_free_page(void* ptr) { uintptr_t flags = spin_lock_irqsave(&pmm_lock); - uint16_t rc = frame_refcount[frame]; + uint32_t rc = frame_refcount[frame]; if (rc == 0 || !bitmap_test(frame)) { spin_unlock_irqrestore(&pmm_lock, flags); @@ -240,11 +240,11 @@ void pmm_incref(uintptr_t paddr) { spin_unlock_irqrestore(&pmm_lock, flags); } -uint16_t pmm_decref(uintptr_t paddr) { +uint32_t pmm_decref(uintptr_t paddr) { uint64_t frame = paddr / PAGE_SIZE; if (frame == 0 || frame >= max_frames) return 0; uintptr_t flags = spin_lock_irqsave(&pmm_lock); - uint16_t new_val = --frame_refcount[frame]; + uint32_t new_val = --frame_refcount[frame]; if (new_val == 0) { bitmap_unset(frame); used_memory -= PAGE_SIZE; @@ -253,11 +253,11 @@ uint16_t pmm_decref(uintptr_t paddr) { return new_val; } -uint16_t pmm_get_refcount(uintptr_t paddr) { +uint32_t pmm_get_refcount(uintptr_t paddr) { uint64_t frame = paddr / PAGE_SIZE; if (frame >= max_frames) return 0; uintptr_t flags = spin_lock_irqsave(&pmm_lock); - uint16_t rc = frame_refcount[frame]; + uint32_t rc = frame_refcount[frame]; spin_unlock_irqrestore(&pmm_lock, flags); return rc; }