void gdt_init(void);
void tss_set_kernel_stack(uintptr_t esp0);
+void gdt_set_gate_ext(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran);
#endif
--- /dev/null
+#ifndef ARCH_X86_PERCPU_H
+#define ARCH_X86_PERCPU_H
+
+#include <stdint.h>
+
+/* Forward declarations */
+struct process;
+struct runqueue;
+
+/* Per-CPU data block — one per CPU, accessed via GS segment.
+ * The GS base for each CPU points to its own percpu_data instance. */
+struct percpu_data {
+ uint32_t cpu_index; /* 0 = BSP */
+ uint32_t lapic_id;
+ struct process* current_process; /* Currently running process on this CPU */
+ uintptr_t kernel_stack; /* Top of this CPU's kernel stack */
+ uint32_t nested_irq; /* IRQ nesting depth */
+ uint32_t reserved[3]; /* Padding to 32 bytes */
+};
+
+/* Initialize per-CPU data for all CPUs. Called once from BSP after SMP init. */
+void percpu_init(void);
+
+/* Set up GS segment for the current CPU (called by each CPU during init). */
+void percpu_setup_gs(uint32_t cpu_index);
+
+/* Get pointer to current CPU's percpu_data (via GS segment). */
+static inline struct percpu_data* percpu_get(void) {
+ struct percpu_data* p;
+ __asm__ volatile("mov %%gs:0, %0" : "=r"(p));
+ return p;
+}
+
+/* Get current CPU index (fast path via GS). */
+static inline uint32_t percpu_cpu_index(void) {
+ uint32_t idx;
+ __asm__ volatile("mov %%gs:0, %0" : "=r"(idx));
+ return idx;
+}
+
+/* Get current process on this CPU (fast path via GS). */
+static inline struct process* percpu_current(void) {
+ struct process* p;
+ __asm__ volatile("mov %%gs:8, %0" : "=r"(p));
+ return p;
+}
+
+/* Set current process on this CPU. */
+static inline void percpu_set_current(struct process* proc) {
+ __asm__ volatile("mov %0, %%gs:8" : : "r"(proc) : "memory");
+}
+
+#endif
uint32_t kernel_stack; /* Top of this CPU's kernel stack */
};
-/* Initialize SMP: discover APs via ACPI, send INIT-SIPI-SIPI.
- * Returns the number of CPUs that started (including BSP). */
-int smp_init(void);
+/* Phase 1: Discover CPUs from ACPI MADT and populate cpu_info.
+ * Does NOT send SIPI. Returns number of CPUs found. */
+int smp_enumerate(void);
+
+/* Phase 2: Send INIT-SIPI-SIPI to wake APs.
+ * Must be called after percpu_init() so GDT entries exist.
+ * Returns number of CPUs that started (including BSP). */
+int smp_start_aps(void);
/* Get the number of active CPUs. */
uint32_t smp_get_cpu_count(void);
#include "arch/x86/lapic.h"
#include "arch/x86/ioapic.h"
#include "arch/x86/smp.h"
+#include "arch/x86/percpu.h"
#endif
#if defined(__i386__)
pic_disable();
}
- /* Bootstrap Application Processors (APs) via INIT-SIPI-SIPI */
- smp_init();
+ /* Phase 1: Enumerate CPUs from ACPI MADT */
+ smp_enumerate();
+
+ /* Initialize per-CPU data and GDT entries (must be before APs start) */
+ percpu_init();
+ percpu_setup_gs(0);
+
+ /* Phase 2: Send INIT-SIPI-SIPI to wake APs */
+ smp_start_aps();
}
keyboard_init();
extern void gdt_flush(uint32_t gdt_ptr_addr);
extern void tss_flush(uint16_t tss_selector);
-static struct gdt_entry gdt[6];
+/* 6 base entries + up to SMP_MAX_CPUS per-CPU GS segments */
+#define GDT_MAX_ENTRIES 24
+static struct gdt_entry gdt[GDT_MAX_ENTRIES];
struct gdt_ptr gp;
static struct tss_entry tss;
gdt[num].access = access;
}
+void gdt_set_gate_ext(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) {
+ if (num < 0 || num >= GDT_MAX_ENTRIES) return;
+ gdt_set_gate(num, base, limit, access, gran);
+ /* Reload GDT limit to include new entries */
+ gp.limit = (uint16_t)(sizeof(struct gdt_entry) * GDT_MAX_ENTRIES - 1);
+ __asm__ volatile("lgdt %0" : : "m"(gp));
+}
+
static void tss_write(uint32_t idx, uint16_t kernel_ss, uint32_t kernel_esp) {
uintptr_t base = (uintptr_t)&tss;
uint32_t limit = (uint32_t)(sizeof(tss) - 1);
void gdt_init(void) {
uart_print("[GDT] Initializing GDT/TSS...\n");
- gp.limit = (uint16_t)(sizeof(struct gdt_entry) * 6 - 1);
+ gp.limit = (uint16_t)(sizeof(struct gdt_entry) * GDT_MAX_ENTRIES - 1);
gp.base = (uint32_t)(uintptr_t)&gdt;
gdt_set_gate(0, 0, 0, 0, 0);
--- /dev/null
+#include "arch/x86/percpu.h"
+#include "arch/x86/smp.h"
+#include "arch/x86/gdt.h"
+#include "uart_console.h"
+#include "utils.h"
+
+#include <stdint.h>
+#include <stddef.h>
+
+static struct percpu_data g_percpu[SMP_MAX_CPUS];
+
+/* We use GDT entries 6..6+N for per-CPU GS segments.
+ * GDT layout: 0=null, 1=kcode, 2=kdata, 3=ucode, 4=udata, 5=TSS, 6+=percpu */
+#define PERCPU_GDT_BASE 6
+
+/* Set a GDT entry for a per-CPU GS segment.
+ * The segment base points to the percpu_data struct for that CPU.
+ * Limit = sizeof(percpu_data) - 1, byte granularity, ring 0 data. */
+static void set_percpu_gdt_entry(uint32_t gdt_index, uint32_t base) {
+ /* Access byte: Present(1) | DPL(00) | S(1) | Type(0010 = data r/w) = 0x92 */
+ /* Granularity: G(0)=byte | D(1)=32bit | L(0) | AVL(0) = 0x40 */
+ extern void gdt_set_gate_ext(int num, uint32_t base, uint32_t limit,
+ uint8_t access, uint8_t gran);
+ gdt_set_gate_ext((int)gdt_index, base, sizeof(struct percpu_data) - 1, 0x92, 0x40);
+}
+
+void percpu_init(void) {
+ uint32_t ncpus = smp_get_cpu_count();
+ if (ncpus > SMP_MAX_CPUS) ncpus = SMP_MAX_CPUS;
+
+ for (uint32_t i = 0; i < ncpus; i++) {
+ const struct cpu_info* ci = smp_get_cpu(i);
+ g_percpu[i].cpu_index = i;
+ g_percpu[i].lapic_id = ci ? ci->lapic_id : 0;
+ g_percpu[i].current_process = NULL;
+ g_percpu[i].kernel_stack = ci ? ci->kernel_stack : 0;
+ g_percpu[i].nested_irq = 0;
+
+ /* Create a GDT entry for this CPU's GS segment */
+ set_percpu_gdt_entry(PERCPU_GDT_BASE + i, (uint32_t)(uintptr_t)&g_percpu[i]);
+ }
+
+ char tmp[12];
+ uart_print("[PERCPU] Initialized for ");
+ itoa(ncpus, tmp, 10);
+ uart_print(tmp);
+ uart_print(" CPU(s).\n");
+}
+
+void percpu_setup_gs(uint32_t cpu_index) {
+ /* GS selector = (PERCPU_GDT_BASE + cpu_index) * 8, RPL=0 */
+ uint16_t sel = (uint16_t)((PERCPU_GDT_BASE + cpu_index) * 8);
+ __asm__ volatile("mov %0, %%gs" : : "r"(sel));
+}
+
+struct percpu_data* percpu_get_ptr(uint32_t cpu_index) {
+ if (cpu_index >= SMP_MAX_CPUS) return NULL;
+ return &g_percpu[cpu_index];
+}
#include "arch/x86/smp.h"
#include "arch/x86/acpi.h"
#include "arch/x86/lapic.h"
+#include "arch/x86/percpu.h"
#include "arch/x86/idt.h"
#include "arch/x86/gdt.h"
#include "uart_console.h"
/* Get our LAPIC ID */
uint32_t my_id = lapic_get_id();
- /* Find our cpu_info slot and mark started */
+ /* Find our cpu_info slot, set up per-CPU GS, and mark started */
for (uint32_t i = 0; i < g_cpu_count; i++) {
if (g_cpus[i].lapic_id == (uint8_t)my_id) {
+ percpu_setup_gs(i);
__atomic_store_n(&g_cpus[i].started, 1, __ATOMIC_SEQ_CST);
break;
}
}
}
-int smp_init(void) {
+int smp_enumerate(void) {
const struct acpi_info* acpi = acpi_get_info();
if (!acpi || acpi->num_cpus <= 1) {
g_cpu_count = 1;
g_cpus[0].lapic_id = (uint8_t)lapic_get_id();
g_cpus[0].cpu_index = 0;
g_cpus[0].started = 1;
- uart_print("[SMP] Single CPU, no APs to start.\n");
+ g_cpus[0].kernel_stack = 0;
+ uart_print("[SMP] Single CPU enumerated.\n");
return 1;
}
- /* Populate cpu_info from ACPI */
g_cpu_count = acpi->num_cpus;
uint8_t bsp_id = (uint8_t)lapic_get_id();
g_cpus[i].kernel_stack = (uint32_t)(uintptr_t)&ap_stacks[i][AP_STACK_SIZE];
}
+ char tmp[12];
+ uart_print("[SMP] Enumerated ");
+ itoa(g_cpu_count, tmp, 10);
+ uart_print(tmp);
+ uart_print(" CPU(s).\n");
+
+ return (int)g_cpu_count;
+}
+
+int smp_start_aps(void) {
+ if (g_cpu_count <= 1) {
+ return 1;
+ }
+
+ const struct acpi_info* acpi = acpi_get_info();
+ if (!acpi) return 1;
+
+ uint8_t bsp_id = (uint8_t)lapic_get_id();
+
/* Copy trampoline code to 0x8000 (identity-mapped by boot.S) */
uint32_t tramp_size = (uint32_t)((uintptr_t)ap_trampoline_end - (uintptr_t)ap_trampoline_start);
volatile uint8_t* dest = (volatile uint8_t*)(AP_TRAMPOLINE_PHYS + KERNEL_VIRT_BASE);