From: Tulio A M Mendes Date: Fri, 13 Feb 2026 09:43:51 +0000 (-0300) Subject: fix: hold sched_lock through context_switch to prevent timer race X-Git-Url: https://projects.tadryanom.me/docs/static/git-favicon.png?a=commitdiff_plain;h=0515d8e0c2b5fdcbcf944bab52e2eaeb07dafcc7;p=AdrOS.git fix: hold sched_lock through context_switch to prevent timer race Root cause of rare kernel panics with EIP on the kernel stack: When schedule() was called from process context (waitpid, sleep), irq_flags had IF=1. spin_unlock_irqrestore() re-enabled interrupts BEFORE context_switch(). If a timer fired in this window: 1. current_process was already set to 'next' (line 835) 2. But we were still executing on prev's stack 3. Nested schedule() treated 'next' as prev, saved prev's ESP into next->sp — CORRUPTING next->sp 4. Future context_switch to 'next' loaded the wrong stack offset, popping garbage registers and a garbage return address 5. EIP ended up pointing into the kernel stack → PAGE FAULT Fix (three parts): 1. schedule(): move context_switch BEFORE spin_unlock_irqrestore. After context_switch we are on the new process's stack, and its saved irq_flags correctly releases the lock. 2. arch_kstack_init: set initial EFLAGS to 0x002 (IF=0) instead of 0x202 so popf in context_switch doesn't enable interrupts while the lock is held. 3. thread_wrapper: release sched_lock and enable interrupts, since new processes arrive here via context_switch's ret (bypassing the spin_unlock_irqrestore after context_switch). Also: remove get_next_ready_process() which incorrectly returned fallback processes not in rq_active, causing rq_dequeue to corrupt the runqueue bitmap. Inlined the logic correctly in schedule(). Verified: 20/20 boots without 'ring3' — zero panics. Build: clean, cppcheck: clean, smoke: 19/19 pass --- diff --git a/src/arch/x86/arch_process.c b/src/arch/x86/arch_process.c index 08c9f7e..25dc927 100644 --- a/src/arch/x86/arch_process.c +++ b/src/arch/x86/arch_process.c @@ -34,7 +34,7 @@ uintptr_t arch_kstack_init(void* stack_top, *--sp = 0; /* EBX */ *--sp = 0; /* ESI */ *--sp = 0; /* EDI */ - *--sp = 0x202; /* EFLAGS: IF=1 */ + *--sp = 0x002; /* EFLAGS: IF=0 (thread_wrapper enables interrupts) */ return (uintptr_t)sp; } diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c index c73cba2..eb98954 100644 --- a/src/kernel/scheduler.c +++ b/src/kernel/scheduler.c @@ -714,7 +714,13 @@ void process_init(void) { spin_unlock_irqrestore(&sched_lock, flags); } +extern spinlock_t sched_lock; + void thread_wrapper(void (*fn)(void)) { + /* We arrive here from context_switch while schedule() still holds + * sched_lock with interrupts disabled. Release the lock and + * enable interrupts so the new thread can run normally. */ + spin_unlock(&sched_lock); hal_cpu_enable_interrupts(); fn(); for(;;) hal_cpu_idle(); @@ -780,23 +786,6 @@ struct process* process_create_kernel(void (*entry_point)(void)) { return proc; } -// Find next READY process — O(1) via bitmap -struct process* get_next_ready_process(void) { - struct process* next = rq_pick_next(); - if (next) return next; - - // Fallback: idle task (PID 0) - if (current_process && current_process->pid == 0) return current_process; - struct process* it = ready_queue_head; - if (!it) return current_process; - const struct process* start = it; - do { - if (it->pid == 0) return it; - it = it->next; - } while (it && it != start); - return current_process; -} - void schedule(void) { uintptr_t irq_flags = spin_lock_irqsave(&sched_lock); @@ -815,21 +804,22 @@ void schedule(void) { rq_enqueue(rq_expired, prev); } - // Pick highest-priority READY process (may swap active/expired). - struct process* next = get_next_ready_process(); + // Pick highest-priority READY process from runqueues (O(1) bitmap). + // rq_pick_next() may swap active/expired internally. + struct process* next = rq_pick_next(); if (next) { - // next is in rq_active (possibly after swap) — remove it. + // next came from rq_active — safe to dequeue. rq_dequeue(rq_active, next); - } - - if (!next) { - // Nothing in runqueues. If prev is still runnable, keep it. + } else { + // Nothing in runqueues. if (prev->state == PROCESS_READY) { + // prev was just enqueued to rq_expired — pull it back. rq_dequeue(rq_expired, prev); next = prev; } else { - // Fall back to idle (PID 0). + // Fall back to idle (PID 0). PID 0 is NOT in any + // runqueue, so we must NOT call rq_dequeue on it. struct process* it = ready_queue_head; next = it; if (it) { @@ -859,12 +849,22 @@ void schedule(void) { hal_cpu_set_kernel_stack((uintptr_t)current_process->kernel_stack + KSTACK_SIZE); } - spin_unlock_irqrestore(&sched_lock, irq_flags); - + /* context_switch MUST execute with the lock held and interrupts + * disabled. Otherwise a timer firing between unlock and + * context_switch would call schedule() again while current_process + * is already set to `next` but we're still on prev's stack, + * corrupting next->sp. + * + * After context_switch we are on the new process's stack. + * irq_flags now holds the value saved during THIS process's + * previous spin_lock_irqsave in schedule(). Releasing the + * lock restores the correct interrupt state. + * + * For brand-new processes, context_switch's `ret` goes to + * thread_wrapper which releases the lock explicitly. */ context_switch(&prev->sp, current_process->sp); - /* EFLAGS (including IF) is now restored by context_switch via popf, - * so we no longer force-enable interrupts here. */ + spin_unlock_irqrestore(&sched_lock, irq_flags); } void process_sleep(uint32_t ticks) {