From: Tulio A M Mendes <tadryanom@hotmail.com>
Date: Sun, 5 Apr 2026 15:19:46 +0000 (-0300)
Subject: fix(security): Red Team bug fixes + deep analysis hardening
X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=6d68750e842b7849ee10a4d1acaea44b61c65403;p=AdrOS.git

fix(security): Red Team bug fixes + deep analysis hardening

Red Team report fixes (8 bugs):
- pmm_free_page: add rc==0 double-free guard with BUG log
- uaccess: move global state to per-CPU via GS segment (SMP safety)
- elf.c: add overflow checks for p_vaddr+base_offset and vaddr+p_memsz
- ksem_wait: retry loop on waiter-array-full instead of silent discard
- kstack: add free-slot recycling stack (256 entries) to prevent leak
- schedule(): fix fallback dequeue from active (not expired) after swap
- buddy_of(): add bounds check, kfree handles NULL return
- e1000: add compiler memory barriers to MMIO read/write helpers

Deep analysis fixes (4 additional bugs):
- readdir: fix strcpy buffer overflow d_name[24] from name[128] in
  tmpfs, devfs, initrd (use strncpy + null termination)
- mq_send_impl: fix TOCTOU race — copy user data to kbuf before lock
- dlopen: add 32-bit overflow checks for p_vaddr+base and vaddr+p_memsz
- tmpfs/overlayfs: replace unbounded strcpy into vfs.name[128] and
  symlink_target[128] with strncpy

Regression fix:
- percpu: add self-pointer as first field of percpu_data so that
  percpu_get() returns a valid pointer (was returning cpu_index=0 as
  NULL pointer on BSP, causing triple fault)

Verified: make ARCH=x86 builds clean, cppcheck+sparse pass,
102/103 smoke tests PASS (1 pre-existing timeout).
---

diff --git a/include/arch/x86/percpu.h b/include/arch/x86/percpu.h
index 304c5219..795b6096 100644
--- a/include/arch/x86/percpu.h
+++ b/include/arch/x86/percpu.h
@@ -19,13 +19,16 @@ struct runqueue;
 /* Per-CPU data block â one per CPU, accessed via GS segment.
  * The GS base for each CPU points to its own percpu_data instance. */
 struct percpu_data {
-    uint32_t         cpu_index;       /* 0 = BSP */
-    uint32_t         lapic_id;
-    struct process*  current_process; /* Currently running process on this CPU */
-    uintptr_t        kernel_stack;    /* Top of this CPU's kernel stack */
-    uint32_t         nested_irq;      /* IRQ nesting depth */
-    uint32_t         rq_load;         /* Number of READY processes on this CPU */
-    uint32_t         reserved[2];     /* Padding to 32 bytes */
+    struct percpu_data* self;         /* offset 0 â self-pointer for percpu_get() */
+    uint32_t         cpu_index;       /* offset 4, 0 = BSP */
+    uint32_t         lapic_id;        /* offset 8 */
+    struct process*  current_process; /* offset 12 */
+    uintptr_t        kernel_stack;    /* offset 16 */
+    uint32_t         nested_irq;      /* offset 20 */
+    uint32_t         rq_load;         /* offset 24 */
+    int              uaccess_active;  /* offset 28 */
+    int              uaccess_faulted; /* offset 32 */
+    uintptr_t        uaccess_recover; /* offset 36 */
 };
 
 /* Initialize per-CPU data for all CPUs. Called once from BSP after SMP init. */
@@ -44,20 +47,20 @@ static inline struct percpu_data* percpu_get(void) {
 /* Get current CPU index (fast path via GS). */
 static inline uint32_t percpu_cpu_index(void) {
     uint32_t idx;
-    __asm__ volatile("mov %%gs:0, %0" : "=r"(idx));
+    __asm__ volatile("mov %%gs:4, %0" : "=r"(idx));
     return idx;
 }
 
 /* Get current process on this CPU (fast path via GS). */
 static inline struct process* percpu_current(void) {
     struct process* p;
-    __asm__ volatile("mov %%gs:8, %0" : "=r"(p));
+    __asm__ volatile("mov %%gs:12, %0" : "=r"(p));
     return p;
 }
 
 /* Set current process on this CPU. */
 static inline void percpu_set_current(struct process* proc) {
-    __asm__ volatile("mov %0, %%gs:8" : : "r"(proc) : "memory");
+    __asm__ volatile("mov %0, %%gs:12" : : "r"(proc) : "memory");
 }
 
 #endif
diff --git a/src/arch/x86/elf.c b/src/arch/x86/elf.c
index 93a4da35..484560cf 100644
--- a/src/arch/x86/elf.c
+++ b/src/arch/x86/elf.c
@@ -127,9 +127,17 @@ static int elf32_load_segments(const uint8_t* file, uint32_t file_len,
         if (ph[i].p_type != PT_LOAD) continue;
         if (ph[i].p_memsz == 0) continue;
 
+        /* Detect 32-bit overflow in p_vaddr + base_offset */
+        if (base_offset != 0 && (uintptr_t)ph[i].p_vaddr > (UINT32_MAX - base_offset))
+            return -EINVAL;
+
         uintptr_t vaddr = (uintptr_t)ph[i].p_vaddr + base_offset;
         if (vaddr == 0 || vaddr >= hal_mm_kernel_virt_base()) return -EINVAL;
 
+        /* Detect 32-bit overflow in vaddr + p_memsz */
+        if (ph[i].p_memsz > (UINT32_MAX - (uint32_t)vaddr))
+            return -EINVAL;
+
         uint32_t seg_end = (uint32_t)vaddr + ph[i].p_memsz;
         if (seg_end < vaddr || seg_end >= hal_mm_kernel_virt_base()) return -EINVAL;
 
diff --git a/src/arch/x86/percpu.c b/src/arch/x86/percpu.c
index 68b0ac90..ec4a7999 100644
--- a/src/arch/x86/percpu.c
+++ b/src/arch/x86/percpu.c
@@ -49,6 +49,7 @@ void percpu_init(void) {
 
     for (uint32_t i = 0; i < ncpus; i++) {
         const struct cpu_info* ci = smp_get_cpu(i);
+        g_percpu[i].self = &g_percpu[i];
         g_percpu[i].cpu_index = i;
         g_percpu[i].lapic_id = ci ? ci->lapic_id : 0;
         g_percpu[i].current_process = NULL;
diff --git a/src/arch/x86/uaccess.c b/src/arch/x86/uaccess.c
index 14f875f8..6553bdb1 100644
--- a/src/arch/x86/uaccess.c
+++ b/src/arch/x86/uaccess.c
@@ -12,6 +12,7 @@
 #include "errno.h"
 #include "interrupts.h"
 #include "hal/mm.h"
+#include "arch/x86/percpu.h"
 
 #include <stdint.h>
 
@@ -40,20 +41,18 @@ static int x86_user_range_basic_ok(uintptr_t uaddr, size_t len) {
     return 1;
 }
 
-static volatile int g_uaccess_active = 0;
-static volatile int g_uaccess_faulted = 0;
-static volatile uintptr_t g_uaccess_recover_eip = 0;
-
 int uaccess_try_recover(uintptr_t fault_addr, struct registers* regs) {
     if (!regs) return 0;
-    if (g_uaccess_active == 0) return 0;
-    if (g_uaccess_recover_eip == 0) return 0;
+
+    struct percpu_data* pc = percpu_get();
+    if (!pc->uaccess_active) return 0;
+    if (!pc->uaccess_recover) return 0;
 
     // Only recover faults on user addresses; kernel faults should still panic.
     if (fault_addr >= hal_mm_kernel_virt_base()) return 0;
 
-    g_uaccess_faulted = 1;
-    regs->eip = (uint32_t)g_uaccess_recover_eip;
+    pc->uaccess_faulted = 1;
+    regs->eip = (uint32_t)pc->uaccess_recover;
     return 1;
 }
 
@@ -128,9 +127,10 @@ int copy_from_user(void* dst, const void* src_user, size_t len) {
     if (len == 0) return 0;
     if (!user_range_ok(src_user, len)) return -EFAULT;
 
-    g_uaccess_faulted = 0;
-    g_uaccess_recover_eip = (uintptr_t)&&uaccess_fault;
-    g_uaccess_active = 1;
+    struct percpu_data* pc = percpu_get();
+    pc->uaccess_faulted = 0;
+    pc->uaccess_recover = (uintptr_t)&&uaccess_fault;
+    pc->uaccess_active = 1;
 
     stac();
     uintptr_t up = (uintptr_t)src_user;
@@ -139,16 +139,16 @@ int copy_from_user(void* dst, const void* src_user, size_t len) {
     }
     clac();
 
-    g_uaccess_active = 0;
-    g_uaccess_recover_eip = 0;
-    if (g_uaccess_faulted) return -EFAULT;
+    pc->uaccess_active = 0;
+    pc->uaccess_recover = 0;
+    if (pc->uaccess_faulted) return -EFAULT;
     return 0;
 
 uaccess_fault:
     clac();
-    g_uaccess_active = 0;
-    g_uaccess_faulted = 0;
-    g_uaccess_recover_eip = 0;
+    pc->uaccess_active = 0;
+    pc->uaccess_faulted = 0;
+    pc->uaccess_recover = 0;
     return -EFAULT;
 }
 
@@ -157,9 +157,10 @@ int copy_to_user(void* dst_user, const void* src, size_t len) {
 
     if (!x86_user_range_writable_user((uintptr_t)dst_user, len)) return -EFAULT;
 
-    g_uaccess_faulted = 0;
-    g_uaccess_recover_eip = (uintptr_t)&&uaccess_fault2;
-    g_uaccess_active = 1;
+    struct percpu_data* pc = percpu_get();
+    pc->uaccess_faulted = 0;
+    pc->uaccess_recover = (uintptr_t)&&uaccess_fault2;
+    pc->uaccess_active = 1;
 
     stac();
     uintptr_t up = (uintptr_t)dst_user;
@@ -168,15 +169,15 @@ int copy_to_user(void* dst_user, const void* src, size_t len) {
     }
     clac();
 
-    g_uaccess_active = 0;
-    g_uaccess_recover_eip = 0;
-    if (g_uaccess_faulted) return -EFAULT;
+    pc->uaccess_active = 0;
+    pc->uaccess_recover = 0;
+    if (pc->uaccess_faulted) return -EFAULT;
     return 0;
 
 uaccess_fault2:
     clac();
-    g_uaccess_active = 0;
-    g_uaccess_faulted = 0;
-    g_uaccess_recover_eip = 0;
+    pc->uaccess_active = 0;
+    pc->uaccess_faulted = 0;
+    pc->uaccess_recover = 0;
     return -EFAULT;
 }
diff --git a/src/drivers/e1000.c b/src/drivers/e1000.c
index 70be7d2f..5d912def 100644
--- a/src/drivers/e1000.c
+++ b/src/drivers/e1000.c
@@ -53,12 +53,20 @@ static uint8_t e1000_bus, e1000_slot, e1000_func;
 /* MMIO helpers                                                       */
 /* ------------------------------------------------------------------ */
 
+/* Compiler barrier â prevents reordering across MMIO accesses.
+ * On x86 with UC-mapped MMIO, CPU ordering is already strict;
+ * the barrier ensures the compiler respects that ordering too. */
+#define mmio_barrier() __asm__ volatile("" ::: "memory")
+
 static inline uint32_t e1000_read(uint32_t reg) {
-    return e1000_mmio[reg / 4];
+    uint32_t val = e1000_mmio[reg / 4];
+    mmio_barrier();
+    return val;
 }
 
 static inline void e1000_write(uint32_t reg, uint32_t val) {
     e1000_mmio[reg / 4] = val;
+    mmio_barrier();
 }
 
 /* ------------------------------------------------------------------ */
diff --git a/src/drivers/initrd.c b/src/drivers/initrd.c
index 8a83107f..8948676f 100644
--- a/src/drivers/initrd.c
+++ b/src/drivers/initrd.c
@@ -246,7 +246,8 @@ static int initrd_readdir(struct fs_node* node, uint32_t* inout_index, void* buf
             if (c == -1) break;
             e.d_ino = (uint32_t)c;
             e.d_type = (uint8_t)entries[c].flags;
-            strcpy(e.d_name, entries[c].name);
+            strncpy(e.d_name, entries[c].name, sizeof(e.d_name) - 1);
+            e.d_name[sizeof(e.d_name) - 1] = '\0';
         }
 
         e.d_reclen = (uint16_t)sizeof(e);
diff --git a/src/kernel/devfs.c b/src/kernel/devfs.c
index 3266dd32..8761841e 100644
--- a/src/kernel/devfs.c
+++ b/src/kernel/devfs.c
@@ -206,12 +206,14 @@ static int devfs_readdir_impl(struct fs_node* node, uint32_t* inout_index, void*
             if (di < NBUILTINS) {
                 e.d_ino = builtins[di].ino;
                 e.d_type = builtins[di].type;
-                strcpy(e.d_name, builtins[di].name);
+                strncpy(e.d_name, builtins[di].name, sizeof(e.d_name) - 1);
+                e.d_name[sizeof(e.d_name) - 1] = '\0';
             } else {
                 fs_node_t* rn = g_registered[di - NBUILTINS];
                 e.d_ino = rn->inode;
                 e.d_type = (uint8_t)rn->flags;
-                strcpy(e.d_name, rn->name);
+                strncpy(e.d_name, rn->name, sizeof(e.d_name) - 1);
+                e.d_name[sizeof(e.d_name) - 1] = '\0';
             }
         }
 
diff --git a/src/kernel/overlayfs.c b/src/kernel/overlayfs.c
index 2f87d265..f809832e 100644
--- a/src/kernel/overlayfs.c
+++ b/src/kernel/overlayfs.c
@@ -127,7 +127,8 @@ static fs_node_t* overlay_wrap_child(struct overlay_node* parent, const char* na
     if (!c) return NULL;
     memset(c, 0, sizeof(*c));
 
-    strcpy(c->vfs.name, name);
+    strncpy(c->vfs.name, name, sizeof(c->vfs.name) - 1);
+    c->vfs.name[sizeof(c->vfs.name) - 1] = '\0';
     c->ofs = parent->ofs;
     c->lower = lower_child;
     c->upper = upper_child;
diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c
index bb10c5e9..b3d8db35 100644
--- a/src/kernel/scheduler.c
+++ b/src/kernel/scheduler.c
@@ -48,13 +48,26 @@ static uintptr_t kernel_as = 0;
 static uint32_t kstack_next_slot = 0;
 static spinlock_t kstack_lock = {0};
 
+/* Free-slot recycling stack: freed slot indices are pushed here and
+ * reused before bumping kstack_next_slot. */
+#define KSTACK_FREE_MAX 256
+static uint32_t kstack_free_stack[KSTACK_FREE_MAX];
+static uint32_t kstack_free_top = 0;
+
 static void* kstack_alloc(void) {
     uintptr_t flags = spin_lock_irqsave(&kstack_lock);
-    if (kstack_next_slot >= KSTACK_MAX) {
+
+    uint32_t slot;
+    if (kstack_free_top > 0) {
+        slot = kstack_free_stack[--kstack_free_top];
+    } else if (kstack_next_slot < KSTACK_MAX) {
+        slot = kstack_next_slot++;
+    } else {
         spin_unlock_irqrestore(&kstack_lock, flags);
+        kprintf("[SCHED] BUG: kernel stack slots exhausted!\n");
         return NULL;
     }
-    uint32_t slot = kstack_next_slot++;
+
     spin_unlock_irqrestore(&kstack_lock, flags);
 
     uintptr_t base = KSTACK_REGION + slot * KSTACK_SLOT;
@@ -78,7 +91,14 @@ static void kstack_free(void* stack) {
         return;
     for (uint32_t i = 0; i < KSTACK_PAGES; i++)
         vmm_unmap_page((uint64_t)(addr + i * 0x1000U));
-    /* Note: slot is not recycled â acceptable for now */
+
+    /* Recycle the slot index */
+    uint32_t slot = (uint32_t)((addr - 0x1000U - KSTACK_REGION) / KSTACK_SLOT);
+    uintptr_t flags = spin_lock_irqsave(&kstack_lock);
+    if (kstack_free_top < KSTACK_FREE_MAX) {
+        kstack_free_stack[kstack_free_top++] = slot;
+    }
+    spin_unlock_irqrestore(&kstack_lock, flags);
 }
 
 /* ---------- O(1) runqueue ---------- */
@@ -1106,8 +1126,9 @@ void schedule(void) {
     } else {
         // Nothing in runqueues.
         if (prev->state == PROCESS_READY) {
-            // prev was just enqueued to expired â pull it back.
-            rq_dequeue(crq->expired, prev);
+            // prev was just enqueued before rq_pick_next swapped active/expired.
+            // After the swap, prev is in crq->active (the old expired).
+            rq_dequeue(crq->active, prev);
             next = prev;
         } else {
             // Fall back to this CPU's idle process.
diff --git a/src/kernel/sync.c b/src/kernel/sync.c
index 5a9dc4cb..d7f42aa3 100644
--- a/src/kernel/sync.c
+++ b/src/kernel/sync.c
@@ -31,7 +31,10 @@ void ksem_init(ksem_t* s, int32_t initial_count) {
 }
 
 void ksem_wait(ksem_t* s) {
-    (void)ksem_wait_timeout(s, 0);
+    while (ksem_wait_timeout(s, 0) != 0) {
+        /* Waiters array full â yield and retry */
+        schedule();
+    }
 }
 
 int ksem_wait_timeout(ksem_t* s, uint32_t timeout_ms) {
diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c
index 50a98402..e6e4ecc3 100644
--- a/src/kernel/syscall.c
+++ b/src/kernel/syscall.c
@@ -145,19 +145,20 @@ static int syscall_mq_send_impl(int mqd, const void* user_buf, uint32_t len, uin
     if (mqd < 0 || mqd >= MQ_MAX_QUEUES) return -EBADF;
     if (len > MQ_MSG_SIZE) return -EMSGSIZE;
 
+    /* Copy user data into a kernel-side buffer before taking the lock
+     * to avoid TOCTOU: another thread could race on the same slot. */
+    uint8_t kbuf[MQ_MSG_SIZE];
+    if (copy_from_user(kbuf, user_buf, len) < 0) return -EFAULT;
+
     uintptr_t fl = spin_lock_irqsave(&mq_lock);
     struct mq_queue* q = &mq_table[mqd];
     if (!q->active) { spin_unlock_irqrestore(&mq_lock, fl); return -EBADF; }
     if (q->count >= q->maxmsg) { spin_unlock_irqrestore(&mq_lock, fl); return -EAGAIN; }
 
     struct mq_msg* m = &q->msgs[q->tail];
-    spin_unlock_irqrestore(&mq_lock, fl);
-
-    if (copy_from_user(m->data, user_buf, len) < 0) return -EFAULT;
+    memcpy(m->data, kbuf, len);
     m->len = len;
     m->prio = prio;
-
-    fl = spin_lock_irqsave(&mq_lock);
     q->tail = (q->tail + 1) % q->maxmsg;
     q->count++;
     spin_unlock_irqrestore(&mq_lock, fl);
@@ -411,9 +412,14 @@ static int syscall_dlopen_impl(const char* user_path) {
         if (p_type != 1) continue; /* PT_LOAD = 1 */
         if (p_memsz == 0) continue;
 
+        /* Detect 32-bit overflow in p_vaddr + base */
+        if (p_vaddr > (UINT32_MAX - base)) continue;
         uint32_t vaddr = p_vaddr + base;
         if (vaddr >= 0xC0000000U) continue;
 
+        /* Detect 32-bit overflow in vaddr + p_memsz */
+        if (p_memsz > (UINT32_MAX - vaddr)) continue;
+
         /* Map pages */
         uint32_t start_page = vaddr & ~0xFFFU;
         uint32_t end_page = (vaddr + p_memsz - 1) & ~0xFFFU;
diff --git a/src/kernel/tmpfs.c b/src/kernel/tmpfs.c
index 85e1200c..8077a9ec 100644
--- a/src/kernel/tmpfs.c
+++ b/src/kernel/tmpfs.c
@@ -56,7 +56,8 @@ static struct tmpfs_node* tmpfs_node_alloc(const char* name, uint32_t flags) {
     memset(n, 0, sizeof(*n));
 
     if (name) {
-        strcpy(n->vfs.name, name);
+        strncpy(n->vfs.name, name, sizeof(n->vfs.name) - 1);
+        n->vfs.name[sizeof(n->vfs.name) - 1] = '\0';
     } else {
         n->vfs.name[0] = 0;
     }
@@ -206,7 +207,8 @@ static int tmpfs_readdir_impl(struct fs_node* node, uint32_t* inout_index, void*
             if (!c) break;
             e.d_ino = c->vfs.inode;
             e.d_type = (uint8_t)c->vfs.flags;
-            strcpy(e.d_name, c->vfs.name);
+            strncpy(e.d_name, c->vfs.name, sizeof(e.d_name) - 1);
+            e.d_name[sizeof(e.d_name) - 1] = '\0';
         }
 
         e.d_reclen = (uint16_t)sizeof(e);
@@ -421,7 +423,8 @@ int tmpfs_create_symlink(fs_node_t* root_dir, const char* link_path, const char*
     struct tmpfs_node* ln = tmpfs_node_alloc(leaf, FS_SYMLINK);
     if (!ln) return -ENOMEM;
 
-    strcpy(ln->vfs.symlink_target, target);
+    strncpy(ln->vfs.symlink_target, target, sizeof(ln->vfs.symlink_target) - 1);
+    ln->vfs.symlink_target[sizeof(ln->vfs.symlink_target) - 1] = '\0';
     ln->vfs.length = (uint32_t)strlen(target);
     /* symlinks have no f_ops */
 
diff --git a/src/mm/heap.c b/src/mm/heap.c
index 8d7efaf1..213fe512 100644
--- a/src/mm/heap.c
+++ b/src/mm/heap.c
@@ -83,10 +83,15 @@ static inline free_node_t* fl_pop(free_node_t* s) {
     return n;
 }
 
-/* Buddy address via XOR on the offset from heap start */
+/* Buddy address via XOR on the offset from heap start.
+ * Returns NULL if the result falls outside the heap â defence-in-depth
+ * against corrupted order fields. */
 static inline block_hdr_t* buddy_of(block_hdr_t* b, int order) {
     uintptr_t off = (uintptr_t)b - KHEAP_START;
-    return (block_hdr_t*)(KHEAP_START + (off ^ (1U << order)));
+    uintptr_t buddy_off = off ^ (1U << order);
+    if (buddy_off >= BUDDY_HEAP_SIZE)
+        return NULL;
+    return (block_hdr_t*)(KHEAP_START + buddy_off);
 }
 
 /* Minimum order that can hold `size` user bytes (+ header) */
@@ -213,6 +218,7 @@ void kfree(void* ptr) {
     /* Coalesce with buddy while possible */
     while (order < BUDDY_MAX_ORDER) {
         block_hdr_t* buddy = buddy_of(blk, order);
+        if (!buddy) break;
 
         /* Buddy must be valid, free, and at the same order */
         if (buddy->magic != BUDDY_MAGIC || !buddy->is_free ||
diff --git a/src/mm/pmm.c b/src/mm/pmm.c
index 49496f2c..aa350438 100644
--- a/src/mm/pmm.c
+++ b/src/mm/pmm.c
@@ -211,6 +211,14 @@ void pmm_free_page(void* ptr) {
     uintptr_t flags = spin_lock_irqsave(&pmm_lock);
 
     uint16_t rc = frame_refcount[frame];
+
+    if (rc == 0 || !bitmap_test(frame)) {
+        spin_unlock_irqrestore(&pmm_lock, flags);
+        kprintf("[PMM] BUG: double free of frame %u (rc=%u)\n",
+                (unsigned)frame, (unsigned)rc);
+        return;
+    }
+
     if (rc > 1) {
         frame_refcount[frame]--;
         spin_unlock_irqrestore(&pmm_lock, flags);