#define PF_W 0x2
#define PF_R 0x4
-int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out);
+int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out, uintptr_t* addr_space_out);
#endif
uintptr_t hal_cpu_get_stack_pointer(void);
uintptr_t hal_cpu_get_address_space(void);
+void hal_cpu_set_address_space(uintptr_t as);
void hal_cpu_set_kernel_stack(uintptr_t sp_top);
*/
void vmm_map_page(uint64_t phys, uint64_t virt, uint32_t flags);
+uintptr_t vmm_as_create_kernel_clone(void);
+void vmm_as_destroy(uintptr_t as);
+void vmm_as_activate(uintptr_t as);
+void vmm_as_map_page(uintptr_t as, uint64_t phys, uint64_t virt, uint32_t flags);
+
/*
* Update flags for an already-mapped virtual page.
* Keeps the physical frame, only changes PRESENT/RW/USER bits.
#include "vga_console.h"
#include "vmm.h"
+#include "process.h"
+
#include "hal/cpu.h"
#include "hal/usermode.h"
static uint8_t ring0_trap_stack[16384] __attribute__((aligned(16)));
#endif
+#if defined(__i386__)
+static void userspace_init_thread(void) {
+ if (!fs_root) {
+ uart_print("[ELF] fs_root missing\n");
+ process_exit_notify(1);
+ schedule();
+ for (;;) hal_cpu_idle();
+ }
+
+ uintptr_t entry = 0;
+ uintptr_t user_sp = 0;
+ uintptr_t user_as = 0;
+ if (elf32_load_user_from_initrd("/bin/init.elf", &entry, &user_sp, &user_as) != 0) {
+ process_exit_notify(1);
+ schedule();
+ for (;;) hal_cpu_idle();
+ }
+
+ current_process->addr_space = user_as;
+ vmm_as_activate(user_as);
+
+ uart_print("[ELF] starting /bin/init.elf\n");
+
+ uart_print("[ELF] user_range_ok(entry)=");
+ uart_put_char(user_range_ok((const void*)entry, 1) ? '1' : '0');
+ uart_print(" user_range_ok(stack)=");
+ uart_put_char(user_range_ok((const void*)(user_sp - 16), 16) ? '1' : '0');
+ uart_print("\n");
+
+ hal_cpu_set_kernel_stack((uintptr_t)&ring0_trap_stack[sizeof(ring0_trap_stack)]);
+
+ if (hal_usermode_enter(entry, user_sp) < 0) {
+ uart_print("[USER] usermode enter not supported on this architecture.\n");
+ process_exit_notify(1);
+ schedule();
+ for (;;) hal_cpu_idle();
+ }
+
+ for (;;) hal_cpu_idle();
+}
+#endif
+
int arch_platform_setup(const struct boot_info* bi) {
(void)bi;
#if defined(__i386__)
int arch_platform_start_userspace(const struct boot_info* bi) {
(void)bi;
#if defined(__i386__)
- if (!fs_root) return -1;
-
- uintptr_t entry = 0;
- uintptr_t user_sp = 0;
- if (elf32_load_user_from_initrd("/bin/init.elf", &entry, &user_sp) != 0) {
- return -1;
- }
-
- uart_print("[ELF] starting /bin/init.elf\n");
-
- uart_print("[ELF] user_range_ok(entry)=");
- uart_put_char(user_range_ok((const void*)entry, 1) ? '1' : '0');
- uart_print(" user_range_ok(stack)=");
- uart_put_char(user_range_ok((const void*)(user_sp - 16), 16) ? '1' : '0');
- uart_print("\n");
-
- hal_cpu_set_kernel_stack((uintptr_t)&ring0_trap_stack[sizeof(ring0_trap_stack)]);
-
- if (hal_usermode_enter(entry, user_sp) < 0) {
- uart_print("[USER] usermode enter not supported on this architecture.\n");
- return -1;
- }
-
+ struct process* p = process_create_kernel(userspace_init_thread);
+ if (!p) return -1;
return 0;
#else
return -1;
#include "vmm.h"
#include "pmm.h"
#include "uart_console.h"
+#include "hal/cpu.h"
#include <stddef.h>
/* Constants */
So accessing boot_pd directly works fine! */
extern uint32_t boot_pd[1024];
+static uintptr_t g_kernel_as = 0;
+
+static inline void invlpg(uintptr_t vaddr) {
+ __asm__ volatile("invlpg (%0)" : : "r" (vaddr) : "memory");
+}
+
+static uint32_t vmm_flags_to_x86(uint32_t flags) {
+ uint32_t x86_flags = 0;
+ if (flags & VMM_FLAG_PRESENT) x86_flags |= X86_PTE_PRESENT;
+ if (flags & VMM_FLAG_RW) x86_flags |= X86_PTE_RW;
+ if (flags & VMM_FLAG_USER) x86_flags |= X86_PTE_USER;
+ return x86_flags;
+}
+
+static uint32_t* vmm_active_pd_virt(void) {
+ uintptr_t as = hal_cpu_get_address_space();
+ return (uint32_t*)P2V((uint32_t)as);
+}
+
static void* pmm_alloc_page_low(void) {
// Bring-up safety: allocate only from identity-mapped area (0-4MB)
// until we have a general phys->virt mapping.
return 0;
}
-static inline void invlpg(uintptr_t vaddr) {
- __asm__ volatile("invlpg (%0)" : : "r" (vaddr) : "memory");
-}
-
-static uint32_t vmm_flags_to_x86(uint32_t flags) {
- uint32_t x86_flags = 0;
- if (flags & VMM_FLAG_PRESENT) x86_flags |= X86_PTE_PRESENT;
- if (flags & VMM_FLAG_RW) x86_flags |= X86_PTE_RW;
- if (flags & VMM_FLAG_USER) x86_flags |= X86_PTE_USER;
- return x86_flags;
-}
-
-void vmm_map_page(uint64_t phys, uint64_t virt, uint32_t flags) {
+static void vmm_map_page_in_pd(uint32_t* pd_virt, uint64_t phys, uint64_t virt, uint32_t flags) {
uint32_t pd_index = virt >> 22;
uint32_t pt_index = (virt >> 12) & 0x03FF;
- // Check if Page Table exists
- if (!(boot_pd[pd_index] & X86_PTE_PRESENT)) {
- // Allocate a new PT
+ if (!(pd_virt[pd_index] & X86_PTE_PRESENT)) {
uint32_t pt_phys = (uint32_t)pmm_alloc_page_low();
if (!pt_phys) {
uart_print("[VMM] OOM allocating page table.\n");
return;
}
- // ACCESS SAFETY: Convert Physical to Virtual to write to it
uint32_t* pt_virt = (uint32_t*)P2V(pt_phys);
-
- // Clear table
- for(int i=0; i<1024; i++) pt_virt[i] = 0;
+ for (int i = 0; i < 1024; i++) pt_virt[i] = 0;
- // Add to Directory
uint32_t pde_flags = X86_PTE_PRESENT | X86_PTE_RW;
if (flags & VMM_FLAG_USER) pde_flags |= X86_PTE_USER;
- boot_pd[pd_index] = pt_phys | pde_flags;
+ pd_virt[pd_index] = pt_phys | pde_flags;
}
- if ((flags & VMM_FLAG_USER) && !(boot_pd[pd_index] & X86_PTE_USER)) {
- boot_pd[pd_index] |= X86_PTE_USER;
+ if ((flags & VMM_FLAG_USER) && !(pd_virt[pd_index] & X86_PTE_USER)) {
+ pd_virt[pd_index] |= X86_PTE_USER;
}
- // Get table address from Directory
- uint32_t pt_phys = boot_pd[pd_index] & 0xFFFFF000;
-
- // ACCESS SAFETY: Convert to Virtual
+ uint32_t pt_phys = pd_virt[pd_index] & 0xFFFFF000;
uint32_t* pt = (uint32_t*)P2V(pt_phys);
-
pt[pt_index] = ((uint32_t)phys) | vmm_flags_to_x86(flags);
- invlpg(virt);
+ invlpg((uintptr_t)virt);
+}
+
+void vmm_map_page(uint64_t phys, uint64_t virt, uint32_t flags) {
+ vmm_map_page_in_pd(vmm_active_pd_virt(), phys, virt, flags);
+}
+
+uintptr_t vmm_as_create_kernel_clone(void) {
+ uint32_t pd_phys = (uint32_t)pmm_alloc_page_low();
+ if (!pd_phys) return 0;
+
+ uint32_t* pd_virt = (uint32_t*)P2V(pd_phys);
+ for (int i = 0; i < 1024; i++) pd_virt[i] = 0;
+
+ // Copy kernel mappings (higher-half PDEs)
+ for (int i = 768; i < 1024; i++) {
+ pd_virt[i] = boot_pd[i];
+ }
+
+ return (uintptr_t)pd_phys;
+}
+
+void vmm_as_activate(uintptr_t as) {
+ if (!as) return;
+ hal_cpu_set_address_space(as);
+}
+
+void vmm_as_map_page(uintptr_t as, uint64_t phys, uint64_t virt, uint32_t flags) {
+ if (!as) return;
+ uint32_t* pd_virt = (uint32_t*)P2V((uint32_t)as);
+ vmm_map_page_in_pd(pd_virt, phys, virt, flags);
+}
+
+void vmm_as_destroy(uintptr_t as) {
+ if (!as) return;
+ if (as == g_kernel_as) return;
+
+ uint32_t* pd = (uint32_t*)P2V((uint32_t)as);
+
+ // Free user page tables + frames for user space.
+ for (int pdi = 0; pdi < 768; pdi++) {
+ uint32_t pde = pd[pdi];
+ if (!(pde & X86_PTE_PRESENT)) continue;
+
+ uint32_t pt_phys = pde & 0xFFFFF000;
+ uint32_t* pt = (uint32_t*)P2V(pt_phys);
+
+ for (int pti = 0; pti < 1024; pti++) {
+ uint32_t pte = pt[pti];
+ if (!(pte & X86_PTE_PRESENT)) continue;
+ uint32_t frame = pte & 0xFFFFF000;
+ pmm_free_page((void*)(uintptr_t)frame);
+ pt[pti] = 0;
+ }
+
+ pmm_free_page((void*)(uintptr_t)pt_phys);
+ pd[pdi] = 0;
+ }
+
+ pmm_free_page((void*)(uintptr_t)as);
}
void vmm_set_page_flags(uint64_t virt, uint32_t flags) {
uint32_t pd_index = virt >> 22;
uint32_t pt_index = (virt >> 12) & 0x03FF;
- if (!(boot_pd[pd_index] & X86_PTE_PRESENT)) {
+ uint32_t* pd = vmm_active_pd_virt();
+
+ if (!(pd[pd_index] & X86_PTE_PRESENT)) {
return;
}
- uint32_t pt_phys = boot_pd[pd_index] & 0xFFFFF000;
+ uint32_t pt_phys = pd[pd_index] & 0xFFFFF000;
uint32_t* pt = (uint32_t*)P2V(pt_phys);
uint32_t pte = pt[pt_index];
void vmm_unmap_page(uint64_t virt) {
uint32_t pd_index = virt >> 22;
uint32_t pt_index = (virt >> 12) & 0x03FF;
-
- if (boot_pd[pd_index] & X86_PTE_PRESENT) {
- uint32_t pt_phys = boot_pd[pd_index] & 0xFFFFF000;
+
+ uint32_t* pd = vmm_active_pd_virt();
+ if (pd[pd_index] & X86_PTE_PRESENT) {
+ uint32_t pt_phys = pd[pd_index] & 0xFFFFF000;
uint32_t* pt = (uint32_t*)P2V(pt_phys);
pt[pt_index] = 0;
void vmm_init(void) {
uart_print("[VMM] Higher Half Kernel Active.\n");
+
+ g_kernel_as = hal_cpu_get_address_space();
// Test mapping
vmm_map_page(0xB8000, 0xC00B8000, VMM_FLAG_PRESENT | VMM_FLAG_RW);
return as;
}
+void hal_cpu_set_address_space(uintptr_t as) {
+#if defined(__x86_64__)
+ __asm__ volatile("mov %0, %%cr3" : : "r"(as) : "memory");
+#else
+ __asm__ volatile("mov %0, %%cr3" : : "r"(as) : "memory");
+#endif
+}
+
void hal_cpu_set_kernel_stack(uintptr_t sp_top) {
tss_set_kernel_stack(sp_top);
}
return 0;
}
+void hal_cpu_set_address_space(uintptr_t as) {
+ (void)as;
+}
+
void hal_cpu_set_kernel_stack(uintptr_t sp_top) {
(void)sp_top;
}
#include "utils.h"
#include "vmm.h"
+#include "hal/cpu.h"
+
#include <stdint.h>
#if defined(__i386__)
return 0;
}
-static int elf32_map_user_range(uintptr_t vaddr, size_t len, uint32_t flags) {
+static int elf32_map_user_range(uintptr_t as, uintptr_t vaddr, size_t len, uint32_t flags) {
if (len == 0) return 0;
if (vaddr == 0) return -1;
if (vaddr >= X86_KERNEL_VIRT_BASE) return -1;
void* phys = pmm_alloc_page_low_16mb();
if (!phys) return -1;
- vmm_map_page((uint64_t)(uintptr_t)phys, (uint64_t)va, flags | VMM_FLAG_PRESENT | VMM_FLAG_USER);
+ vmm_as_map_page(as, (uint64_t)(uintptr_t)phys, (uint64_t)va, flags | VMM_FLAG_PRESENT | VMM_FLAG_USER);
if (va == end_page) break;
}
return 0;
}
-int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out) {
- if (!filename || !entry_out || !user_stack_top_out) return -1;
+int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out, uintptr_t* addr_space_out) {
+ if (!filename || !entry_out || !user_stack_top_out || !addr_space_out) return -1;
if (!fs_root) return -1;
+ uintptr_t new_as = vmm_as_create_kernel_clone();
+ if (!new_as) return -1;
+
+ uintptr_t old_as = hal_cpu_get_address_space();
+ vmm_as_activate(new_as);
+
fs_node_t* node = vfs_lookup(filename);
if (!node) {
uart_print("[ELF] file not found: ");
uint32_t rd = vfs_read(node, 0, file_len, file);
if (rd != file_len) {
kfree(file);
+ vmm_as_activate(old_as);
+ vmm_as_destroy(new_as);
return -1;
}
if (elf32_validate(eh, file_len) < 0) {
uart_print("[ELF] invalid ELF header\n");
kfree(file);
+ vmm_as_activate(old_as);
+ vmm_as_destroy(new_as);
return -1;
}
if (ph[i].p_vaddr == 0) {
uart_print("[ELF] PT_LOAD with vaddr=0 rejected\n");
kfree(file);
+ vmm_as_activate(old_as);
+ vmm_as_destroy(new_as);
return -1;
}
if (ph[i].p_vaddr >= X86_KERNEL_VIRT_BASE) {
uart_print("[ELF] PT_LOAD in kernel range rejected\n");
kfree(file);
+ vmm_as_activate(old_as);
+ vmm_as_destroy(new_as);
return -1;
}
const uint32_t map_flags = VMM_FLAG_RW;
- if (elf32_map_user_range((uintptr_t)ph[i].p_vaddr, (size_t)ph[i].p_memsz, map_flags) < 0) {
+ if (elf32_map_user_range(new_as, (uintptr_t)ph[i].p_vaddr, (size_t)ph[i].p_memsz, map_flags) < 0) {
uart_print("[ELF] OOM mapping user segment\n");
kfree(file);
+ vmm_as_activate(old_as);
+ vmm_as_destroy(new_as);
return -1;
}
const uintptr_t user_stack_base = 0x00800000U;
const size_t user_stack_size = 0x1000;
- if (elf32_map_user_range(user_stack_base, user_stack_size, VMM_FLAG_RW) < 0) {
+ if (elf32_map_user_range(new_as, user_stack_base, user_stack_size, VMM_FLAG_RW) < 0) {
uart_print("[ELF] OOM mapping user stack\n");
kfree(file);
+ vmm_as_activate(old_as);
+ vmm_as_destroy(new_as);
return -1;
}
*entry_out = (uintptr_t)eh->e_entry;
*user_stack_top_out = user_stack_base + user_stack_size;
+ *addr_space_out = new_as;
kfree(file);
+ vmm_as_activate(old_as);
return 0;
}
#else
-int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out) {
+int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out, uintptr_t* addr_space_out) {
(void)filename;
(void)entry_out;
(void)user_stack_top_out;
+ (void)addr_space_out;
return -1;
}
#endif
static uint32_t next_pid = 1;
static spinlock_t sched_lock = {0};
+static uintptr_t kernel_as = 0;
static struct process* process_find_locked(uint32_t pid) {
if (!ready_queue_head) return NULL;
p->kernel_stack = NULL;
}
+ if (p->addr_space && p->addr_space != kernel_as) {
+ vmm_as_destroy(p->addr_space);
+ p->addr_space = 0;
+ }
+
kfree(p);
}
kernel_proc->state = PROCESS_RUNNING;
kernel_proc->wake_at_tick = 0;
kernel_proc->addr_space = hal_cpu_get_address_space();
+ kernel_as = kernel_proc->addr_space;
kernel_proc->exit_status = 0;
kernel_proc->waiting = 0;
kernel_proc->wait_pid = -1;
proc->pid = next_pid++;
proc->parent_pid = current_process ? current_process->pid : 0;
proc->state = PROCESS_READY;
- proc->addr_space = current_process->addr_space;
+ proc->addr_space = kernel_as ? kernel_as : (current_process ? current_process->addr_space : 0);
proc->wake_at_tick = 0;
proc->exit_status = 0;
proc->waiting = 0;
current_process = next;
current_process->state = PROCESS_RUNNING;
+ if (current_process->addr_space && current_process->addr_space != prev->addr_space) {
+ hal_cpu_set_address_space(current_process->addr_space);
+ }
+
// For ring3->ring0 transitions, esp0 must point to the top of the kernel stack.
if (current_process->kernel_stack) {
hal_cpu_set_kernel_stack((uintptr_t)current_process->kernel_stack + 4096);