From: Tulio A M Mendes Date: Tue, 10 Feb 2026 09:26:46 +0000 (-0300) Subject: feat: Fase 8a — Per-CPU data infrastructure with GS-segment access X-Git-Url: https://projects.tadryanom.me/docs/static/gitweb.js?a=commitdiff_plain;h=629bb6d6b3717f0fa0a4abbc044f8fd330551754;p=AdrOS.git feat: Fase 8a — Per-CPU data infrastructure with GS-segment access New files: - include/arch/x86/percpu.h — Per-CPU data structure and GS-based accessors (percpu_get, percpu_cpu_index, percpu_current, etc.) - src/arch/x86/percpu.c — Per-CPU init: creates GDT entries for each CPU's GS segment pointing to its percpu_data instance Changes: - include/arch/x86/smp.h: Split smp_init into smp_enumerate() and smp_start_aps() to allow percpu_init between enumeration and SIPI - src/arch/x86/smp.c: Implement two-phase SMP init - include/arch/x86/gdt.h: Export gdt_ptr struct, gp variable, and gdt_set_gate_ext() for per-CPU GDT entry creation - src/arch/x86/gdt.c: Expand GDT from 6 to 24 entries (6 base + up to 16 per-CPU GS segments). Add gdt_set_gate_ext(). Make gp non-static. - src/arch/x86/arch_platform.c: Call smp_enumerate() -> percpu_init() -> percpu_setup_gs(0) -> smp_start_aps() in correct order Boot sequence for per-CPU setup: 1. smp_enumerate() — populate cpu_info from ACPI MADT 2. percpu_init() — create GDT entries for each CPU's GS segment 3. percpu_setup_gs(0) — BSP loads its own GS selector 4. smp_start_aps() — send INIT-SIPI-SIPI; each AP calls percpu_setup_gs(i) during its init Passes: make, cppcheck, QEMU smoke test (-smp 1 and -smp 4) --- diff --git a/include/arch/x86/gdt.h b/include/arch/x86/gdt.h index 4547b64..c87c22f 100644 --- a/include/arch/x86/gdt.h +++ b/include/arch/x86/gdt.h @@ -13,5 +13,6 @@ extern struct gdt_ptr gp; void gdt_init(void); void tss_set_kernel_stack(uintptr_t esp0); +void gdt_set_gate_ext(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran); #endif diff --git a/include/arch/x86/percpu.h b/include/arch/x86/percpu.h new file mode 100644 index 0000000..2a1a591 --- /dev/null +++ b/include/arch/x86/percpu.h @@ -0,0 +1,53 @@ +#ifndef ARCH_X86_PERCPU_H +#define ARCH_X86_PERCPU_H + +#include + +/* Forward declarations */ +struct process; +struct runqueue; + +/* Per-CPU data block — one per CPU, accessed via GS segment. + * The GS base for each CPU points to its own percpu_data instance. */ +struct percpu_data { + uint32_t cpu_index; /* 0 = BSP */ + uint32_t lapic_id; + struct process* current_process; /* Currently running process on this CPU */ + uintptr_t kernel_stack; /* Top of this CPU's kernel stack */ + uint32_t nested_irq; /* IRQ nesting depth */ + uint32_t reserved[3]; /* Padding to 32 bytes */ +}; + +/* Initialize per-CPU data for all CPUs. Called once from BSP after SMP init. */ +void percpu_init(void); + +/* Set up GS segment for the current CPU (called by each CPU during init). */ +void percpu_setup_gs(uint32_t cpu_index); + +/* Get pointer to current CPU's percpu_data (via GS segment). */ +static inline struct percpu_data* percpu_get(void) { + struct percpu_data* p; + __asm__ volatile("mov %%gs:0, %0" : "=r"(p)); + return p; +} + +/* Get current CPU index (fast path via GS). */ +static inline uint32_t percpu_cpu_index(void) { + uint32_t idx; + __asm__ volatile("mov %%gs:0, %0" : "=r"(idx)); + return idx; +} + +/* Get current process on this CPU (fast path via GS). */ +static inline struct process* percpu_current(void) { + struct process* p; + __asm__ volatile("mov %%gs:8, %0" : "=r"(p)); + return p; +} + +/* Set current process on this CPU. */ +static inline void percpu_set_current(struct process* proc) { + __asm__ volatile("mov %0, %%gs:8" : : "r"(proc) : "memory"); +} + +#endif diff --git a/include/arch/x86/smp.h b/include/arch/x86/smp.h index 9fb946b..1ceafbe 100644 --- a/include/arch/x86/smp.h +++ b/include/arch/x86/smp.h @@ -15,9 +15,14 @@ struct cpu_info { uint32_t kernel_stack; /* Top of this CPU's kernel stack */ }; -/* Initialize SMP: discover APs via ACPI, send INIT-SIPI-SIPI. - * Returns the number of CPUs that started (including BSP). */ -int smp_init(void); +/* Phase 1: Discover CPUs from ACPI MADT and populate cpu_info. + * Does NOT send SIPI. Returns number of CPUs found. */ +int smp_enumerate(void); + +/* Phase 2: Send INIT-SIPI-SIPI to wake APs. + * Must be called after percpu_init() so GDT entries exist. + * Returns number of CPUs that started (including BSP). */ +int smp_start_aps(void); /* Get the number of active CPUs. */ uint32_t smp_get_cpu_count(void); diff --git a/src/arch/x86/arch_platform.c b/src/arch/x86/arch_platform.c index 1abfe66..27f0a3f 100644 --- a/src/arch/x86/arch_platform.c +++ b/src/arch/x86/arch_platform.c @@ -21,6 +21,7 @@ #include "arch/x86/lapic.h" #include "arch/x86/ioapic.h" #include "arch/x86/smp.h" +#include "arch/x86/percpu.h" #endif #if defined(__i386__) @@ -110,8 +111,15 @@ int arch_platform_setup(const struct boot_info* bi) { pic_disable(); } - /* Bootstrap Application Processors (APs) via INIT-SIPI-SIPI */ - smp_init(); + /* Phase 1: Enumerate CPUs from ACPI MADT */ + smp_enumerate(); + + /* Initialize per-CPU data and GDT entries (must be before APs start) */ + percpu_init(); + percpu_setup_gs(0); + + /* Phase 2: Send INIT-SIPI-SIPI to wake APs */ + smp_start_aps(); } keyboard_init(); diff --git a/src/arch/x86/gdt.c b/src/arch/x86/gdt.c index b2cf132..6fdb813 100644 --- a/src/arch/x86/gdt.c +++ b/src/arch/x86/gdt.c @@ -45,7 +45,9 @@ struct tss_entry { extern void gdt_flush(uint32_t gdt_ptr_addr); extern void tss_flush(uint16_t tss_selector); -static struct gdt_entry gdt[6]; +/* 6 base entries + up to SMP_MAX_CPUS per-CPU GS segments */ +#define GDT_MAX_ENTRIES 24 +static struct gdt_entry gdt[GDT_MAX_ENTRIES]; struct gdt_ptr gp; static struct tss_entry tss; @@ -61,6 +63,14 @@ static void gdt_set_gate(int num, uint32_t base, uint32_t limit, uint8_t access, gdt[num].access = access; } +void gdt_set_gate_ext(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) { + if (num < 0 || num >= GDT_MAX_ENTRIES) return; + gdt_set_gate(num, base, limit, access, gran); + /* Reload GDT limit to include new entries */ + gp.limit = (uint16_t)(sizeof(struct gdt_entry) * GDT_MAX_ENTRIES - 1); + __asm__ volatile("lgdt %0" : : "m"(gp)); +} + static void tss_write(uint32_t idx, uint16_t kernel_ss, uint32_t kernel_esp) { uintptr_t base = (uintptr_t)&tss; uint32_t limit = (uint32_t)(sizeof(tss) - 1); @@ -86,7 +96,7 @@ void tss_set_kernel_stack(uintptr_t esp0) { void gdt_init(void) { uart_print("[GDT] Initializing GDT/TSS...\n"); - gp.limit = (uint16_t)(sizeof(struct gdt_entry) * 6 - 1); + gp.limit = (uint16_t)(sizeof(struct gdt_entry) * GDT_MAX_ENTRIES - 1); gp.base = (uint32_t)(uintptr_t)&gdt; gdt_set_gate(0, 0, 0, 0, 0); diff --git a/src/arch/x86/percpu.c b/src/arch/x86/percpu.c new file mode 100644 index 0000000..7b395e3 --- /dev/null +++ b/src/arch/x86/percpu.c @@ -0,0 +1,59 @@ +#include "arch/x86/percpu.h" +#include "arch/x86/smp.h" +#include "arch/x86/gdt.h" +#include "uart_console.h" +#include "utils.h" + +#include +#include + +static struct percpu_data g_percpu[SMP_MAX_CPUS]; + +/* We use GDT entries 6..6+N for per-CPU GS segments. + * GDT layout: 0=null, 1=kcode, 2=kdata, 3=ucode, 4=udata, 5=TSS, 6+=percpu */ +#define PERCPU_GDT_BASE 6 + +/* Set a GDT entry for a per-CPU GS segment. + * The segment base points to the percpu_data struct for that CPU. + * Limit = sizeof(percpu_data) - 1, byte granularity, ring 0 data. */ +static void set_percpu_gdt_entry(uint32_t gdt_index, uint32_t base) { + /* Access byte: Present(1) | DPL(00) | S(1) | Type(0010 = data r/w) = 0x92 */ + /* Granularity: G(0)=byte | D(1)=32bit | L(0) | AVL(0) = 0x40 */ + extern void gdt_set_gate_ext(int num, uint32_t base, uint32_t limit, + uint8_t access, uint8_t gran); + gdt_set_gate_ext((int)gdt_index, base, sizeof(struct percpu_data) - 1, 0x92, 0x40); +} + +void percpu_init(void) { + uint32_t ncpus = smp_get_cpu_count(); + if (ncpus > SMP_MAX_CPUS) ncpus = SMP_MAX_CPUS; + + for (uint32_t i = 0; i < ncpus; i++) { + const struct cpu_info* ci = smp_get_cpu(i); + g_percpu[i].cpu_index = i; + g_percpu[i].lapic_id = ci ? ci->lapic_id : 0; + g_percpu[i].current_process = NULL; + g_percpu[i].kernel_stack = ci ? ci->kernel_stack : 0; + g_percpu[i].nested_irq = 0; + + /* Create a GDT entry for this CPU's GS segment */ + set_percpu_gdt_entry(PERCPU_GDT_BASE + i, (uint32_t)(uintptr_t)&g_percpu[i]); + } + + char tmp[12]; + uart_print("[PERCPU] Initialized for "); + itoa(ncpus, tmp, 10); + uart_print(tmp); + uart_print(" CPU(s).\n"); +} + +void percpu_setup_gs(uint32_t cpu_index) { + /* GS selector = (PERCPU_GDT_BASE + cpu_index) * 8, RPL=0 */ + uint16_t sel = (uint16_t)((PERCPU_GDT_BASE + cpu_index) * 8); + __asm__ volatile("mov %0, %%gs" : : "r"(sel)); +} + +struct percpu_data* percpu_get_ptr(uint32_t cpu_index) { + if (cpu_index >= SMP_MAX_CPUS) return NULL; + return &g_percpu[cpu_index]; +} diff --git a/src/arch/x86/smp.c b/src/arch/x86/smp.c index 9ade901..e09637e 100644 --- a/src/arch/x86/smp.c +++ b/src/arch/x86/smp.c @@ -1,6 +1,7 @@ #include "arch/x86/smp.h" #include "arch/x86/acpi.h" #include "arch/x86/lapic.h" +#include "arch/x86/percpu.h" #include "arch/x86/idt.h" #include "arch/x86/gdt.h" #include "uart_console.h" @@ -65,9 +66,10 @@ void ap_entry(void) { /* Get our LAPIC ID */ uint32_t my_id = lapic_get_id(); - /* Find our cpu_info slot and mark started */ + /* Find our cpu_info slot, set up per-CPU GS, and mark started */ for (uint32_t i = 0; i < g_cpu_count; i++) { if (g_cpus[i].lapic_id == (uint8_t)my_id) { + percpu_setup_gs(i); __atomic_store_n(&g_cpus[i].started, 1, __ATOMIC_SEQ_CST); break; } @@ -80,18 +82,18 @@ void ap_entry(void) { } } -int smp_init(void) { +int smp_enumerate(void) { const struct acpi_info* acpi = acpi_get_info(); if (!acpi || acpi->num_cpus <= 1) { g_cpu_count = 1; g_cpus[0].lapic_id = (uint8_t)lapic_get_id(); g_cpus[0].cpu_index = 0; g_cpus[0].started = 1; - uart_print("[SMP] Single CPU, no APs to start.\n"); + g_cpus[0].kernel_stack = 0; + uart_print("[SMP] Single CPU enumerated.\n"); return 1; } - /* Populate cpu_info from ACPI */ g_cpu_count = acpi->num_cpus; uint8_t bsp_id = (uint8_t)lapic_get_id(); @@ -102,6 +104,25 @@ int smp_init(void) { g_cpus[i].kernel_stack = (uint32_t)(uintptr_t)&ap_stacks[i][AP_STACK_SIZE]; } + char tmp[12]; + uart_print("[SMP] Enumerated "); + itoa(g_cpu_count, tmp, 10); + uart_print(tmp); + uart_print(" CPU(s).\n"); + + return (int)g_cpu_count; +} + +int smp_start_aps(void) { + if (g_cpu_count <= 1) { + return 1; + } + + const struct acpi_info* acpi = acpi_get_info(); + if (!acpi) return 1; + + uint8_t bsp_id = (uint8_t)lapic_get_id(); + /* Copy trampoline code to 0x8000 (identity-mapped by boot.S) */ uint32_t tramp_size = (uint32_t)((uintptr_t)ap_trampoline_end - (uintptr_t)ap_trampoline_start); volatile uint8_t* dest = (volatile uint8_t*)(AP_TRAMPOLINE_PHYS + KERNEL_VIRT_BASE);