From: Tulio A M Mendes Date: Sun, 5 Apr 2026 15:19:46 +0000 (-0300) Subject: fix(security): Red Team bug fixes + deep analysis hardening X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=6d68750e842b7849ee10a4d1acaea44b61c65403;p=AdrOS.git fix(security): Red Team bug fixes + deep analysis hardening Red Team report fixes (8 bugs): - pmm_free_page: add rc==0 double-free guard with BUG log - uaccess: move global state to per-CPU via GS segment (SMP safety) - elf.c: add overflow checks for p_vaddr+base_offset and vaddr+p_memsz - ksem_wait: retry loop on waiter-array-full instead of silent discard - kstack: add free-slot recycling stack (256 entries) to prevent leak - schedule(): fix fallback dequeue from active (not expired) after swap - buddy_of(): add bounds check, kfree handles NULL return - e1000: add compiler memory barriers to MMIO read/write helpers Deep analysis fixes (4 additional bugs): - readdir: fix strcpy buffer overflow d_name[24] from name[128] in tmpfs, devfs, initrd (use strncpy + null termination) - mq_send_impl: fix TOCTOU race — copy user data to kbuf before lock - dlopen: add 32-bit overflow checks for p_vaddr+base and vaddr+p_memsz - tmpfs/overlayfs: replace unbounded strcpy into vfs.name[128] and symlink_target[128] with strncpy Regression fix: - percpu: add self-pointer as first field of percpu_data so that percpu_get() returns a valid pointer (was returning cpu_index=0 as NULL pointer on BSP, causing triple fault) Verified: make ARCH=x86 builds clean, cppcheck+sparse pass, 102/103 smoke tests PASS (1 pre-existing timeout). --- diff --git a/include/arch/x86/percpu.h b/include/arch/x86/percpu.h index 304c5219..795b6096 100644 --- a/include/arch/x86/percpu.h +++ b/include/arch/x86/percpu.h @@ -19,13 +19,16 @@ struct runqueue; /* Per-CPU data block — one per CPU, accessed via GS segment. * The GS base for each CPU points to its own percpu_data instance. */ struct percpu_data { - uint32_t cpu_index; /* 0 = BSP */ - uint32_t lapic_id; - struct process* current_process; /* Currently running process on this CPU */ - uintptr_t kernel_stack; /* Top of this CPU's kernel stack */ - uint32_t nested_irq; /* IRQ nesting depth */ - uint32_t rq_load; /* Number of READY processes on this CPU */ - uint32_t reserved[2]; /* Padding to 32 bytes */ + struct percpu_data* self; /* offset 0 — self-pointer for percpu_get() */ + uint32_t cpu_index; /* offset 4, 0 = BSP */ + uint32_t lapic_id; /* offset 8 */ + struct process* current_process; /* offset 12 */ + uintptr_t kernel_stack; /* offset 16 */ + uint32_t nested_irq; /* offset 20 */ + uint32_t rq_load; /* offset 24 */ + int uaccess_active; /* offset 28 */ + int uaccess_faulted; /* offset 32 */ + uintptr_t uaccess_recover; /* offset 36 */ }; /* Initialize per-CPU data for all CPUs. Called once from BSP after SMP init. */ @@ -44,20 +47,20 @@ static inline struct percpu_data* percpu_get(void) { /* Get current CPU index (fast path via GS). */ static inline uint32_t percpu_cpu_index(void) { uint32_t idx; - __asm__ volatile("mov %%gs:0, %0" : "=r"(idx)); + __asm__ volatile("mov %%gs:4, %0" : "=r"(idx)); return idx; } /* Get current process on this CPU (fast path via GS). */ static inline struct process* percpu_current(void) { struct process* p; - __asm__ volatile("mov %%gs:8, %0" : "=r"(p)); + __asm__ volatile("mov %%gs:12, %0" : "=r"(p)); return p; } /* Set current process on this CPU. */ static inline void percpu_set_current(struct process* proc) { - __asm__ volatile("mov %0, %%gs:8" : : "r"(proc) : "memory"); + __asm__ volatile("mov %0, %%gs:12" : : "r"(proc) : "memory"); } #endif diff --git a/src/arch/x86/elf.c b/src/arch/x86/elf.c index 93a4da35..484560cf 100644 --- a/src/arch/x86/elf.c +++ b/src/arch/x86/elf.c @@ -127,9 +127,17 @@ static int elf32_load_segments(const uint8_t* file, uint32_t file_len, if (ph[i].p_type != PT_LOAD) continue; if (ph[i].p_memsz == 0) continue; + /* Detect 32-bit overflow in p_vaddr + base_offset */ + if (base_offset != 0 && (uintptr_t)ph[i].p_vaddr > (UINT32_MAX - base_offset)) + return -EINVAL; + uintptr_t vaddr = (uintptr_t)ph[i].p_vaddr + base_offset; if (vaddr == 0 || vaddr >= hal_mm_kernel_virt_base()) return -EINVAL; + /* Detect 32-bit overflow in vaddr + p_memsz */ + if (ph[i].p_memsz > (UINT32_MAX - (uint32_t)vaddr)) + return -EINVAL; + uint32_t seg_end = (uint32_t)vaddr + ph[i].p_memsz; if (seg_end < vaddr || seg_end >= hal_mm_kernel_virt_base()) return -EINVAL; diff --git a/src/arch/x86/percpu.c b/src/arch/x86/percpu.c index 68b0ac90..ec4a7999 100644 --- a/src/arch/x86/percpu.c +++ b/src/arch/x86/percpu.c @@ -49,6 +49,7 @@ void percpu_init(void) { for (uint32_t i = 0; i < ncpus; i++) { const struct cpu_info* ci = smp_get_cpu(i); + g_percpu[i].self = &g_percpu[i]; g_percpu[i].cpu_index = i; g_percpu[i].lapic_id = ci ? ci->lapic_id : 0; g_percpu[i].current_process = NULL; diff --git a/src/arch/x86/uaccess.c b/src/arch/x86/uaccess.c index 14f875f8..6553bdb1 100644 --- a/src/arch/x86/uaccess.c +++ b/src/arch/x86/uaccess.c @@ -12,6 +12,7 @@ #include "errno.h" #include "interrupts.h" #include "hal/mm.h" +#include "arch/x86/percpu.h" #include @@ -40,20 +41,18 @@ static int x86_user_range_basic_ok(uintptr_t uaddr, size_t len) { return 1; } -static volatile int g_uaccess_active = 0; -static volatile int g_uaccess_faulted = 0; -static volatile uintptr_t g_uaccess_recover_eip = 0; - int uaccess_try_recover(uintptr_t fault_addr, struct registers* regs) { if (!regs) return 0; - if (g_uaccess_active == 0) return 0; - if (g_uaccess_recover_eip == 0) return 0; + + struct percpu_data* pc = percpu_get(); + if (!pc->uaccess_active) return 0; + if (!pc->uaccess_recover) return 0; // Only recover faults on user addresses; kernel faults should still panic. if (fault_addr >= hal_mm_kernel_virt_base()) return 0; - g_uaccess_faulted = 1; - regs->eip = (uint32_t)g_uaccess_recover_eip; + pc->uaccess_faulted = 1; + regs->eip = (uint32_t)pc->uaccess_recover; return 1; } @@ -128,9 +127,10 @@ int copy_from_user(void* dst, const void* src_user, size_t len) { if (len == 0) return 0; if (!user_range_ok(src_user, len)) return -EFAULT; - g_uaccess_faulted = 0; - g_uaccess_recover_eip = (uintptr_t)&&uaccess_fault; - g_uaccess_active = 1; + struct percpu_data* pc = percpu_get(); + pc->uaccess_faulted = 0; + pc->uaccess_recover = (uintptr_t)&&uaccess_fault; + pc->uaccess_active = 1; stac(); uintptr_t up = (uintptr_t)src_user; @@ -139,16 +139,16 @@ int copy_from_user(void* dst, const void* src_user, size_t len) { } clac(); - g_uaccess_active = 0; - g_uaccess_recover_eip = 0; - if (g_uaccess_faulted) return -EFAULT; + pc->uaccess_active = 0; + pc->uaccess_recover = 0; + if (pc->uaccess_faulted) return -EFAULT; return 0; uaccess_fault: clac(); - g_uaccess_active = 0; - g_uaccess_faulted = 0; - g_uaccess_recover_eip = 0; + pc->uaccess_active = 0; + pc->uaccess_faulted = 0; + pc->uaccess_recover = 0; return -EFAULT; } @@ -157,9 +157,10 @@ int copy_to_user(void* dst_user, const void* src, size_t len) { if (!x86_user_range_writable_user((uintptr_t)dst_user, len)) return -EFAULT; - g_uaccess_faulted = 0; - g_uaccess_recover_eip = (uintptr_t)&&uaccess_fault2; - g_uaccess_active = 1; + struct percpu_data* pc = percpu_get(); + pc->uaccess_faulted = 0; + pc->uaccess_recover = (uintptr_t)&&uaccess_fault2; + pc->uaccess_active = 1; stac(); uintptr_t up = (uintptr_t)dst_user; @@ -168,15 +169,15 @@ int copy_to_user(void* dst_user, const void* src, size_t len) { } clac(); - g_uaccess_active = 0; - g_uaccess_recover_eip = 0; - if (g_uaccess_faulted) return -EFAULT; + pc->uaccess_active = 0; + pc->uaccess_recover = 0; + if (pc->uaccess_faulted) return -EFAULT; return 0; uaccess_fault2: clac(); - g_uaccess_active = 0; - g_uaccess_faulted = 0; - g_uaccess_recover_eip = 0; + pc->uaccess_active = 0; + pc->uaccess_faulted = 0; + pc->uaccess_recover = 0; return -EFAULT; } diff --git a/src/drivers/e1000.c b/src/drivers/e1000.c index 70be7d2f..5d912def 100644 --- a/src/drivers/e1000.c +++ b/src/drivers/e1000.c @@ -53,12 +53,20 @@ static uint8_t e1000_bus, e1000_slot, e1000_func; /* MMIO helpers */ /* ------------------------------------------------------------------ */ +/* Compiler barrier — prevents reordering across MMIO accesses. + * On x86 with UC-mapped MMIO, CPU ordering is already strict; + * the barrier ensures the compiler respects that ordering too. */ +#define mmio_barrier() __asm__ volatile("" ::: "memory") + static inline uint32_t e1000_read(uint32_t reg) { - return e1000_mmio[reg / 4]; + uint32_t val = e1000_mmio[reg / 4]; + mmio_barrier(); + return val; } static inline void e1000_write(uint32_t reg, uint32_t val) { e1000_mmio[reg / 4] = val; + mmio_barrier(); } /* ------------------------------------------------------------------ */ diff --git a/src/drivers/initrd.c b/src/drivers/initrd.c index 8a83107f..8948676f 100644 --- a/src/drivers/initrd.c +++ b/src/drivers/initrd.c @@ -246,7 +246,8 @@ static int initrd_readdir(struct fs_node* node, uint32_t* inout_index, void* buf if (c == -1) break; e.d_ino = (uint32_t)c; e.d_type = (uint8_t)entries[c].flags; - strcpy(e.d_name, entries[c].name); + strncpy(e.d_name, entries[c].name, sizeof(e.d_name) - 1); + e.d_name[sizeof(e.d_name) - 1] = '\0'; } e.d_reclen = (uint16_t)sizeof(e); diff --git a/src/kernel/devfs.c b/src/kernel/devfs.c index 3266dd32..8761841e 100644 --- a/src/kernel/devfs.c +++ b/src/kernel/devfs.c @@ -206,12 +206,14 @@ static int devfs_readdir_impl(struct fs_node* node, uint32_t* inout_index, void* if (di < NBUILTINS) { e.d_ino = builtins[di].ino; e.d_type = builtins[di].type; - strcpy(e.d_name, builtins[di].name); + strncpy(e.d_name, builtins[di].name, sizeof(e.d_name) - 1); + e.d_name[sizeof(e.d_name) - 1] = '\0'; } else { fs_node_t* rn = g_registered[di - NBUILTINS]; e.d_ino = rn->inode; e.d_type = (uint8_t)rn->flags; - strcpy(e.d_name, rn->name); + strncpy(e.d_name, rn->name, sizeof(e.d_name) - 1); + e.d_name[sizeof(e.d_name) - 1] = '\0'; } } diff --git a/src/kernel/overlayfs.c b/src/kernel/overlayfs.c index 2f87d265..f809832e 100644 --- a/src/kernel/overlayfs.c +++ b/src/kernel/overlayfs.c @@ -127,7 +127,8 @@ static fs_node_t* overlay_wrap_child(struct overlay_node* parent, const char* na if (!c) return NULL; memset(c, 0, sizeof(*c)); - strcpy(c->vfs.name, name); + strncpy(c->vfs.name, name, sizeof(c->vfs.name) - 1); + c->vfs.name[sizeof(c->vfs.name) - 1] = '\0'; c->ofs = parent->ofs; c->lower = lower_child; c->upper = upper_child; diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c index bb10c5e9..b3d8db35 100644 --- a/src/kernel/scheduler.c +++ b/src/kernel/scheduler.c @@ -48,13 +48,26 @@ static uintptr_t kernel_as = 0; static uint32_t kstack_next_slot = 0; static spinlock_t kstack_lock = {0}; +/* Free-slot recycling stack: freed slot indices are pushed here and + * reused before bumping kstack_next_slot. */ +#define KSTACK_FREE_MAX 256 +static uint32_t kstack_free_stack[KSTACK_FREE_MAX]; +static uint32_t kstack_free_top = 0; + static void* kstack_alloc(void) { uintptr_t flags = spin_lock_irqsave(&kstack_lock); - if (kstack_next_slot >= KSTACK_MAX) { + + uint32_t slot; + if (kstack_free_top > 0) { + slot = kstack_free_stack[--kstack_free_top]; + } else if (kstack_next_slot < KSTACK_MAX) { + slot = kstack_next_slot++; + } else { spin_unlock_irqrestore(&kstack_lock, flags); + kprintf("[SCHED] BUG: kernel stack slots exhausted!\n"); return NULL; } - uint32_t slot = kstack_next_slot++; + spin_unlock_irqrestore(&kstack_lock, flags); uintptr_t base = KSTACK_REGION + slot * KSTACK_SLOT; @@ -78,7 +91,14 @@ static void kstack_free(void* stack) { return; for (uint32_t i = 0; i < KSTACK_PAGES; i++) vmm_unmap_page((uint64_t)(addr + i * 0x1000U)); - /* Note: slot is not recycled — acceptable for now */ + + /* Recycle the slot index */ + uint32_t slot = (uint32_t)((addr - 0x1000U - KSTACK_REGION) / KSTACK_SLOT); + uintptr_t flags = spin_lock_irqsave(&kstack_lock); + if (kstack_free_top < KSTACK_FREE_MAX) { + kstack_free_stack[kstack_free_top++] = slot; + } + spin_unlock_irqrestore(&kstack_lock, flags); } /* ---------- O(1) runqueue ---------- */ @@ -1106,8 +1126,9 @@ void schedule(void) { } else { // Nothing in runqueues. if (prev->state == PROCESS_READY) { - // prev was just enqueued to expired — pull it back. - rq_dequeue(crq->expired, prev); + // prev was just enqueued before rq_pick_next swapped active/expired. + // After the swap, prev is in crq->active (the old expired). + rq_dequeue(crq->active, prev); next = prev; } else { // Fall back to this CPU's idle process. diff --git a/src/kernel/sync.c b/src/kernel/sync.c index 5a9dc4cb..d7f42aa3 100644 --- a/src/kernel/sync.c +++ b/src/kernel/sync.c @@ -31,7 +31,10 @@ void ksem_init(ksem_t* s, int32_t initial_count) { } void ksem_wait(ksem_t* s) { - (void)ksem_wait_timeout(s, 0); + while (ksem_wait_timeout(s, 0) != 0) { + /* Waiters array full — yield and retry */ + schedule(); + } } int ksem_wait_timeout(ksem_t* s, uint32_t timeout_ms) { diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index 50a98402..e6e4ecc3 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -145,19 +145,20 @@ static int syscall_mq_send_impl(int mqd, const void* user_buf, uint32_t len, uin if (mqd < 0 || mqd >= MQ_MAX_QUEUES) return -EBADF; if (len > MQ_MSG_SIZE) return -EMSGSIZE; + /* Copy user data into a kernel-side buffer before taking the lock + * to avoid TOCTOU: another thread could race on the same slot. */ + uint8_t kbuf[MQ_MSG_SIZE]; + if (copy_from_user(kbuf, user_buf, len) < 0) return -EFAULT; + uintptr_t fl = spin_lock_irqsave(&mq_lock); struct mq_queue* q = &mq_table[mqd]; if (!q->active) { spin_unlock_irqrestore(&mq_lock, fl); return -EBADF; } if (q->count >= q->maxmsg) { spin_unlock_irqrestore(&mq_lock, fl); return -EAGAIN; } struct mq_msg* m = &q->msgs[q->tail]; - spin_unlock_irqrestore(&mq_lock, fl); - - if (copy_from_user(m->data, user_buf, len) < 0) return -EFAULT; + memcpy(m->data, kbuf, len); m->len = len; m->prio = prio; - - fl = spin_lock_irqsave(&mq_lock); q->tail = (q->tail + 1) % q->maxmsg; q->count++; spin_unlock_irqrestore(&mq_lock, fl); @@ -411,9 +412,14 @@ static int syscall_dlopen_impl(const char* user_path) { if (p_type != 1) continue; /* PT_LOAD = 1 */ if (p_memsz == 0) continue; + /* Detect 32-bit overflow in p_vaddr + base */ + if (p_vaddr > (UINT32_MAX - base)) continue; uint32_t vaddr = p_vaddr + base; if (vaddr >= 0xC0000000U) continue; + /* Detect 32-bit overflow in vaddr + p_memsz */ + if (p_memsz > (UINT32_MAX - vaddr)) continue; + /* Map pages */ uint32_t start_page = vaddr & ~0xFFFU; uint32_t end_page = (vaddr + p_memsz - 1) & ~0xFFFU; diff --git a/src/kernel/tmpfs.c b/src/kernel/tmpfs.c index 85e1200c..8077a9ec 100644 --- a/src/kernel/tmpfs.c +++ b/src/kernel/tmpfs.c @@ -56,7 +56,8 @@ static struct tmpfs_node* tmpfs_node_alloc(const char* name, uint32_t flags) { memset(n, 0, sizeof(*n)); if (name) { - strcpy(n->vfs.name, name); + strncpy(n->vfs.name, name, sizeof(n->vfs.name) - 1); + n->vfs.name[sizeof(n->vfs.name) - 1] = '\0'; } else { n->vfs.name[0] = 0; } @@ -206,7 +207,8 @@ static int tmpfs_readdir_impl(struct fs_node* node, uint32_t* inout_index, void* if (!c) break; e.d_ino = c->vfs.inode; e.d_type = (uint8_t)c->vfs.flags; - strcpy(e.d_name, c->vfs.name); + strncpy(e.d_name, c->vfs.name, sizeof(e.d_name) - 1); + e.d_name[sizeof(e.d_name) - 1] = '\0'; } e.d_reclen = (uint16_t)sizeof(e); @@ -421,7 +423,8 @@ int tmpfs_create_symlink(fs_node_t* root_dir, const char* link_path, const char* struct tmpfs_node* ln = tmpfs_node_alloc(leaf, FS_SYMLINK); if (!ln) return -ENOMEM; - strcpy(ln->vfs.symlink_target, target); + strncpy(ln->vfs.symlink_target, target, sizeof(ln->vfs.symlink_target) - 1); + ln->vfs.symlink_target[sizeof(ln->vfs.symlink_target) - 1] = '\0'; ln->vfs.length = (uint32_t)strlen(target); /* symlinks have no f_ops */ diff --git a/src/mm/heap.c b/src/mm/heap.c index 8d7efaf1..213fe512 100644 --- a/src/mm/heap.c +++ b/src/mm/heap.c @@ -83,10 +83,15 @@ static inline free_node_t* fl_pop(free_node_t* s) { return n; } -/* Buddy address via XOR on the offset from heap start */ +/* Buddy address via XOR on the offset from heap start. + * Returns NULL if the result falls outside the heap — defence-in-depth + * against corrupted order fields. */ static inline block_hdr_t* buddy_of(block_hdr_t* b, int order) { uintptr_t off = (uintptr_t)b - KHEAP_START; - return (block_hdr_t*)(KHEAP_START + (off ^ (1U << order))); + uintptr_t buddy_off = off ^ (1U << order); + if (buddy_off >= BUDDY_HEAP_SIZE) + return NULL; + return (block_hdr_t*)(KHEAP_START + buddy_off); } /* Minimum order that can hold `size` user bytes (+ header) */ @@ -213,6 +218,7 @@ void kfree(void* ptr) { /* Coalesce with buddy while possible */ while (order < BUDDY_MAX_ORDER) { block_hdr_t* buddy = buddy_of(blk, order); + if (!buddy) break; /* Buddy must be valid, free, and at the same order */ if (buddy->magic != BUDDY_MAGIC || !buddy->is_free || diff --git a/src/mm/pmm.c b/src/mm/pmm.c index 49496f2c..aa350438 100644 --- a/src/mm/pmm.c +++ b/src/mm/pmm.c @@ -211,6 +211,14 @@ void pmm_free_page(void* ptr) { uintptr_t flags = spin_lock_irqsave(&pmm_lock); uint16_t rc = frame_refcount[frame]; + + if (rc == 0 || !bitmap_test(frame)) { + spin_unlock_irqrestore(&pmm_lock, flags); + kprintf("[PMM] BUG: double free of frame %u (rc=%u)\n", + (unsigned)frame, (unsigned)rc); + return; + } + if (rc > 1) { frame_refcount[frame]--; spin_unlock_irqrestore(&pmm_lock, flags);