- arch_fpu_init(): initialize x87 FPU (CR0.NE, clear EM/TS), enable OSFXSR if FXSR supported
- arch_fpu_save/restore: FXSAVE/FXRSTOR (or FSAVE/FRSTOR fallback) per process
- FPU state (512B) added to struct process, initialized for new processes
- fork/clone inherit parent FPU state; kernel threads get clean state
- schedule() saves prev FPU state before context_switch, restores next after
- Heap header padded 8->16 bytes for 16-byte aligned kmalloc (FXSAVE requirement)
- Added -mno-sse -mno-mmx to kernel ARCH_CFLAGS (prevent SSE in kernel code)
- Weak stubs in src/kernel/fpu.c for non-x86 architectures
C_SOURCES += $(NET_SOURCES)
# Mandatory Architecture Flags
- ARCH_CFLAGS := -m32 -ffreestanding -fno-builtin -U_FORTIFY_SOURCE -Iinclude -Iinclude/net -Ithird_party/lwip/src/include
+ ARCH_CFLAGS := -m32 -ffreestanding -fno-builtin -U_FORTIFY_SOURCE -mno-sse -mno-mmx -Iinclude -Iinclude/net -Ithird_party/lwip/src/include
ARCH_LDFLAGS := -m elf_i386 -T $(SRC_DIR)/arch/x86/linker.ld
ARCH_ASFLAGS := --32
--- /dev/null
+#ifndef ARCH_FPU_H
+#define ARCH_FPU_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/*
+ * FPU/SSE context save area size.
+ * FXSAVE requires 512 bytes, 16-byte aligned.
+ * FSAVE requires 108 bytes (no alignment requirement).
+ * We always allocate the larger size for simplicity.
+ */
+#define FPU_STATE_SIZE 512
+#define FPU_STATE_ALIGN 16
+
+/* Initialize FPU hardware during boot (CR0/CR4 bits, FNINIT). */
+void arch_fpu_init(void);
+
+/* Save current FPU/SSE state into buffer (must be 16-byte aligned). */
+void arch_fpu_save(uint8_t* state);
+
+/* Restore FPU/SSE state from buffer (must be 16-byte aligned). */
+void arch_fpu_restore(const uint8_t* state);
+
+/* Copy the clean (post-FNINIT) FPU state into buffer for new processes. */
+void arch_fpu_init_state(uint8_t* state);
+
+#endif
#include <stdint.h>
#include "arch_types.h"
+#include "arch_fpu.h"
#include "fs.h"
#include "signal.h"
uint32_t flags; /* PROCESS_FLAG_* */
uintptr_t tls_base; /* User-space TLS base (set via SET_THREAD_AREA) */
uint32_t* clear_child_tid; /* User address to clear + futex-wake on exit */
+
+ uint8_t fpu_state[FPU_STATE_SIZE] __attribute__((aligned(FPU_STATE_ALIGN)));
};
// Global pointer to the currently running process
--- /dev/null
+#include "arch_fpu.h"
+#include "console.h"
+#include "hal/cpu_features.h"
+
+#include <stdint.h>
+#include <string.h>
+
+/* CR0 bits */
+#define CR0_EM (1U << 2) /* Emulate coprocessor (must be CLEAR for real FPU) */
+#define CR0_TS (1U << 3) /* Task Switched (lazy FPU — we clear it) */
+#define CR0_NE (1U << 5) /* Numeric Error (use native FPU exceptions) */
+#define CR0_MP (1U << 1) /* Monitor coprocessor */
+
+/* CR4 bits */
+#define CR4_OSFXSR (1U << 9) /* OS supports FXSAVE/FXRSTOR */
+#define CR4_OSXMMEXCPT (1U << 10) /* OS supports SSE exceptions */
+
+static int g_fpu_has_fxsr = 0;
+
+/* Clean FPU state captured right after FNINIT — used as template for new processes */
+static uint8_t g_fpu_clean_state[FPU_STATE_SIZE] __attribute__((aligned(FPU_STATE_ALIGN)));
+
+static inline uint32_t read_cr0(void) {
+ uint32_t val;
+ __asm__ volatile("mov %%cr0, %0" : "=r"(val));
+ return val;
+}
+
+static inline void write_cr0(uint32_t val) {
+ __asm__ volatile("mov %0, %%cr0" :: "r"(val) : "memory");
+}
+
+static inline uint32_t read_cr4(void) {
+ uint32_t val;
+ __asm__ volatile("mov %%cr4, %0" : "=r"(val));
+ return val;
+}
+
+static inline void write_cr4(uint32_t val) {
+ __asm__ volatile("mov %0, %%cr4" :: "r"(val) : "memory");
+}
+
+void arch_fpu_init(void) {
+ const struct cpu_features* f = hal_cpu_get_features();
+
+ /* Set CR0: clear EM (no emulation), set MP+NE, clear TS */
+ uint32_t cr0 = read_cr0();
+ cr0 &= ~(CR0_EM | CR0_TS);
+ cr0 |= CR0_MP | CR0_NE;
+ write_cr0(cr0);
+
+ /* Initialize x87 FPU */
+ __asm__ volatile("fninit");
+
+ /* Enable FXSAVE/FXRSTOR if supported */
+ if (f->has_fxsr) {
+ uint32_t cr4 = read_cr4();
+ cr4 |= CR4_OSFXSR | CR4_OSXMMEXCPT;
+ write_cr4(cr4);
+ g_fpu_has_fxsr = 1;
+ kprintf("[FPU] FXSAVE/FXRSTOR enabled (SSE context support).\n");
+ } else {
+ kprintf("[FPU] Using legacy FSAVE/FRSTOR.\n");
+ }
+
+ /* Capture clean FPU state as template for new processes */
+ memset(g_fpu_clean_state, 0, FPU_STATE_SIZE);
+ arch_fpu_save(g_fpu_clean_state);
+
+ kprintf("[FPU] FPU/SSE context switching initialized.\n");
+}
+
+void arch_fpu_save(uint8_t* state) {
+ if (g_fpu_has_fxsr) {
+ __asm__ volatile("fxsave (%0)" :: "r"(state) : "memory");
+ } else {
+ __asm__ volatile("fnsave (%0)" :: "r"(state) : "memory");
+ /* fnsave resets the FPU — reinitialize so current process can keep using it */
+ __asm__ volatile("fninit");
+ }
+}
+
+void arch_fpu_restore(const uint8_t* state) {
+ if (g_fpu_has_fxsr) {
+ __asm__ volatile("fxrstor (%0)" :: "r"(state) : "memory");
+ } else {
+ __asm__ volatile("frstor (%0)" :: "r"(state) : "memory");
+ }
+}
+
+void arch_fpu_init_state(uint8_t* state) {
+ memcpy(state, g_fpu_clean_state, FPU_STATE_SIZE);
+}
--- /dev/null
+#include "arch_fpu.h"
+#include "console.h"
+#include <string.h>
+
+__attribute__((weak))
+void arch_fpu_init(void) {
+ kprintf("[FPU] No arch-specific FPU support.\n");
+}
+
+__attribute__((weak))
+void arch_fpu_save(uint8_t* state) {
+ (void)state;
+}
+
+__attribute__((weak))
+void arch_fpu_restore(const uint8_t* state) {
+ (void)state;
+}
+
+__attribute__((weak))
+void arch_fpu_init_state(uint8_t* state) {
+ memset(state, 0, FPU_STATE_SIZE);
+}
#include "hal/cpu.h"
#include "hal/cpu_features.h"
+#include "arch_fpu.h"
#include "shm.h"
#include "net.h"
hal_cpu_detect_features();
hal_cpu_print_features();
+ arch_fpu_init();
kprintf("[AdrOS] Initializing PMM...\n");
#include "hal/cpu.h"
#include "hal/usermode.h"
#include "arch_process.h"
+#include "arch_fpu.h"
#include "sched_pcpu.h"
#include <stddef.h>
proc->tls_base = 0;
proc->clear_child_tid = NULL;
+ if (current_process) {
+ memcpy(proc->fpu_state, current_process->fpu_state, FPU_STATE_SIZE);
+ } else {
+ arch_fpu_init_state(proc->fpu_state);
+ }
+
for (int i = 0; i < PROCESS_MAX_FILES; i++) {
proc->files[i] = NULL;
}
proc->heap_start = current_process->heap_start;
proc->heap_break = current_process->heap_break;
+ memcpy(proc->fpu_state, current_process->fpu_state, FPU_STATE_SIZE);
+
for (int i = 0; i < PROCESS_MAX_MMAPS; i++) {
proc->mmaps[i] = current_process->mmaps[i];
}
kernel_proc->tls_base = 0;
kernel_proc->clear_child_tid = NULL;
+ arch_fpu_init_state(kernel_proc->fpu_state);
+
/* Allocate a dedicated kernel stack for PID 0 with guard page. */
void* kstack0 = kstack_alloc();
if (!kstack0) {
proc->tls_base = 0;
proc->clear_child_tid = NULL;
+ arch_fpu_init_state(proc->fpu_state);
+
for (int i = 0; i < PROCESS_MAX_FILES; i++) {
proc->files[i] = NULL;
}
*
* For brand-new processes, context_switch's `ret` goes to
* thread_wrapper which releases the lock explicitly. */
+ arch_fpu_save(prev->fpu_state);
context_switch(&prev->sp, current_process->sp);
+ arch_fpu_restore(current_process->fpu_state);
spin_unlock_irqrestore(&sched_lock, irq_flags);
}
uint8_t order; /* 5..23 */
uint8_t is_free; /* 1 = free, 0 = allocated */
uint16_t pad;
-} block_hdr_t; /* 8 bytes → keeps 8-byte alignment */
+ uint32_t pad2[2]; /* Pad to 16 bytes for 16-byte aligned returns */
+} block_hdr_t; /* 16 bytes → FXSAVE-safe alignment */
/* Free-list node, embedded in the data area of a free block */
typedef struct free_node {