From: Tulio A M Mendes Date: Sun, 8 Feb 2026 01:47:06 +0000 (-0300) Subject: D3: fork syscall + remove spawn X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=8c457f9a9d237f5ce68f808eed4abb705c59143a;p=AdrOS.git D3: fork syscall + remove spawn Implement fork() by cloning user address space and resuming child in ring3 from a saved register frame. Duplicate FD table via refcounting. Remove temporary spawn syscall and update init.elf waitpid test to use fork(). --- diff --git a/include/arch/x86/usermode.h b/include/arch/x86/usermode.h index 64fcd74..0f1b015 100644 --- a/include/arch/x86/usermode.h +++ b/include/arch/x86/usermode.h @@ -3,8 +3,11 @@ #include +#include "idt.h" + #if defined(__i386__) __attribute__((noreturn)) void x86_enter_usermode(uintptr_t user_eip, uintptr_t user_esp); +__attribute__((noreturn)) void x86_enter_usermode_regs(const struct registers* regs); #endif #endif diff --git a/include/process.h b/include/process.h index b9e8a83..4ee30de 100644 --- a/include/process.h +++ b/include/process.h @@ -3,6 +3,7 @@ #include #include "idt.h" // For struct registers +#include "fs.h" typedef enum { PROCESS_READY, @@ -12,7 +13,12 @@ typedef enum { PROCESS_ZOMBIE } process_state_t; -struct file; +struct file { + fs_node_t* node; + uint32_t offset; + uint32_t flags; + uint32_t refcount; +}; #define PROCESS_MAX_FILES 16 @@ -26,6 +32,9 @@ struct process { uint32_t wake_at_tick; // New: When to wake up (global tick count) int exit_status; + int has_user_regs; + struct registers user_regs; + int waiting; int wait_pid; int wait_result_pid; @@ -64,7 +73,7 @@ int process_waitpid(int pid, int* status_out); // Mark current process as exiting and notify/wake a waiter (if any). void process_exit_notify(int status); -// Temporary: spawn a kernel-thread child that sleeps briefly and exits. -int process_spawn_test_child(void); +// Create a child process that will resume in usermode from a saved register frame. +struct process* process_fork_create(uintptr_t child_as, const struct registers* child_regs); #endif diff --git a/include/syscall.h b/include/syscall.h index fada6d3..fd59cc3 100644 --- a/include/syscall.h +++ b/include/syscall.h @@ -16,9 +16,6 @@ enum { SYSCALL_WAITPID = 7, - // Temporary: spawn a kernel-thread child for waitpid testing. - SYSCALL_SPAWN = 8, - SYSCALL_LSEEK = 9, SYSCALL_FSTAT = 10, SYSCALL_STAT = 11, @@ -27,6 +24,7 @@ enum { SYSCALL_DUP2 = 13, SYSCALL_PIPE = 14, SYSCALL_EXECVE = 15, + SYSCALL_FORK = 16, }; #endif diff --git a/include/vmm.h b/include/vmm.h index 852169a..644f58d 100644 --- a/include/vmm.h +++ b/include/vmm.h @@ -27,6 +27,8 @@ void vmm_as_destroy(uintptr_t as); void vmm_as_activate(uintptr_t as); void vmm_as_map_page(uintptr_t as, uint64_t phys, uint64_t virt, uint32_t flags); +uintptr_t vmm_as_clone_user(uintptr_t src_as); + /* * Update flags for an already-mapped virtual page. * Keeps the physical frame, only changes PRESENT/RW/USER bits. diff --git a/src/arch/x86/usermode.c b/src/arch/x86/usermode.c index 1ea55a0..d80e18e 100644 --- a/src/arch/x86/usermode.c +++ b/src/arch/x86/usermode.c @@ -6,6 +6,7 @@ #include "uart_console.h" #include "utils.h" #include "arch/x86/usermode.h" +#include "idt.h" #if defined(__i386__) @@ -96,6 +97,49 @@ __attribute__((noreturn)) void x86_enter_usermode(uintptr_t user_eip, uintptr_t __builtin_unreachable(); } +__attribute__((noreturn)) void x86_enter_usermode_regs(const struct registers* regs) { + if (!regs) { + for (;;) { + __asm__ volatile("cli; hlt"); + } + } + + // Layout follows include/arch/x86/idt.h struct registers. + const uint32_t eflags = (regs->eflags | 0x200U); + + __asm__ volatile( + "cli\n" + "mov %[r], %%ebp\n" + + "mov $0x23, %%ax\n" + "mov %%ax, %%ds\n" + "mov %%ax, %%es\n" + "mov %%ax, %%fs\n" + "mov %%ax, %%gs\n" + + "pushl $0x23\n" /* ss */ + "pushl 56(%%ebp)\n" /* useresp */ + "pushl %[efl]\n" /* eflags */ + "pushl $0x1B\n" /* cs */ + "pushl 44(%%ebp)\n" /* eip */ + + "mov 4(%%ebp), %%edi\n" /* edi */ + "mov 8(%%ebp), %%esi\n" /* esi */ + "mov 20(%%ebp), %%ebx\n" /* ebx */ + "mov 24(%%ebp), %%edx\n" /* edx */ + "mov 28(%%ebp), %%ecx\n" /* ecx */ + "mov 32(%%ebp), %%eax\n" /* eax */ + "mov 12(%%ebp), %%ebp\n" /* ebp */ + "iret\n" + : + : [r] "r"(regs), + [efl] "r"(eflags) + : "memory", "cc", "ax", "ebp" + ); + + __builtin_unreachable(); +} + void x86_usermode_test_start(void) { uart_print("[USER] Starting ring3 test...\n"); diff --git a/src/arch/x86/vmm.c b/src/arch/x86/vmm.c index e671b0b..3aa1590 100644 --- a/src/arch/x86/vmm.c +++ b/src/arch/x86/vmm.c @@ -1,6 +1,7 @@ #include "vmm.h" #include "pmm.h" #include "uart_console.h" +#include "utils.h" #include "hal/cpu.h" #include @@ -106,6 +107,62 @@ uintptr_t vmm_as_create_kernel_clone(void) { return (uintptr_t)pd_phys; } +uintptr_t vmm_as_clone_user(uintptr_t src_as) { + if (!src_as) return 0; + + uintptr_t new_as = vmm_as_create_kernel_clone(); + if (!new_as) return 0; + + uint32_t* src_pd = (uint32_t*)P2V((uint32_t)src_as); + const uint32_t* const boot_pd_virt = boot_pd; + + // Best-effort clone: copy present user mappings, ignore kernel half. + for (uint32_t pdi = 0; pdi < 768; pdi++) { + uint32_t pde = src_pd[pdi]; + if (!(pde & X86_PTE_PRESENT)) continue; + + // Skip if this PDE looks like a kernel mapping (shouldn't happen for pdi<768). + if (boot_pd_virt[pdi] == pde) continue; + + uint32_t src_pt_phys = pde & 0xFFFFF000; + uint32_t* src_pt = (uint32_t*)P2V(src_pt_phys); + + for (uint32_t pti = 0; pti < 1024; pti++) { + uint32_t pte = src_pt[pti]; + if (!(pte & X86_PTE_PRESENT)) continue; + const uint32_t x86_flags = pte & 0xFFF; + + // Derive VMM flags. + uint32_t flags = VMM_FLAG_PRESENT; + if (x86_flags & X86_PTE_RW) flags |= VMM_FLAG_RW; + if (x86_flags & X86_PTE_USER) flags |= VMM_FLAG_USER; + + void* dst_frame = pmm_alloc_page_low(); + if (!dst_frame) { + vmm_as_destroy(new_as); + return 0; + } + + uintptr_t va = ((uintptr_t)pdi << 22) | ((uintptr_t)pti << 12); + vmm_as_map_page(new_as, (uint64_t)(uintptr_t)dst_frame, (uint64_t)va, flags); + + // Copy contents by temporarily switching address spaces. + uintptr_t old_as = hal_cpu_get_address_space(); + uint8_t tmp[4096]; + + vmm_as_activate(src_as); + memcpy(tmp, (const void*)va, sizeof(tmp)); + + vmm_as_activate(new_as); + memcpy((void*)va, tmp, sizeof(tmp)); + + vmm_as_activate(old_as); + } + } + + return new_as; +} + void vmm_as_activate(uintptr_t as) { if (!as) return; hal_cpu_set_address_space(as); diff --git a/src/kernel/scheduler.c b/src/kernel/scheduler.c index 83aab6e..f36f76a 100644 --- a/src/kernel/scheduler.c +++ b/src/kernel/scheduler.c @@ -7,6 +7,9 @@ #include "spinlock.h" #include "utils.h" #include "hal/cpu.h" +#if defined(__i386__) +#include "arch/x86/usermode.h" +#endif #include struct process* current_process = NULL; @@ -17,6 +20,8 @@ static uint32_t next_pid = 1; static spinlock_t sched_lock = {0}; static uintptr_t kernel_as = 0; +void thread_wrapper(void (*fn)(void)); + static struct process* process_find_locked(uint32_t pid) { if (!ready_queue_head) return NULL; @@ -150,18 +155,81 @@ void process_exit_notify(int status) { spin_unlock_irqrestore(&sched_lock, flags); } -static void spawn_test_child_entry(void) { - process_exit_notify(42); - schedule(); - for(;;) { - hal_cpu_idle(); + +static void fork_child_trampoline(void) { +#if defined(__i386__) + if (!current_process || !current_process->has_user_regs) { + process_exit_notify(1); + schedule(); + for (;;) hal_cpu_idle(); } + + if (current_process->addr_space) { + vmm_as_activate(current_process->addr_space); + } + + x86_enter_usermode_regs(¤t_process->user_regs); +#else + process_exit_notify(1); + schedule(); + for (;;) hal_cpu_idle(); +#endif } -int process_spawn_test_child(void) { - struct process* p = process_create_kernel(spawn_test_child_entry); - if (!p) return -1; - return (int)p->pid; +struct process* process_fork_create(uintptr_t child_as, const struct registers* child_regs) { + if (!child_as || !child_regs) return NULL; + + uintptr_t flags = spin_lock_irqsave(&sched_lock); + + struct process* proc = (struct process*)kmalloc(sizeof(*proc)); + if (!proc) { + spin_unlock_irqrestore(&sched_lock, flags); + return NULL; + } + memset(proc, 0, sizeof(*proc)); + + proc->pid = next_pid++; + proc->parent_pid = current_process ? current_process->pid : 0; + proc->state = PROCESS_READY; + proc->addr_space = child_as; + proc->wake_at_tick = 0; + proc->exit_status = 0; + + proc->waiting = 0; + proc->wait_pid = -1; + proc->wait_result_pid = -1; + proc->wait_result_status = 0; + + proc->has_user_regs = 1; + proc->user_regs = *child_regs; + + for (int i = 0; i < PROCESS_MAX_FILES; i++) { + proc->files[i] = NULL; + } + + void* stack = kmalloc(4096); + if (!stack) { + kfree(proc); + spin_unlock_irqrestore(&sched_lock, flags); + return NULL; + } + proc->kernel_stack = (uint32_t*)stack; + + uint32_t* sp = (uint32_t*)((uint8_t*)stack + 4096); + *--sp = (uint32_t)fork_child_trampoline; + *--sp = 0; + *--sp = (uint32_t)thread_wrapper; + *--sp = 0; *--sp = 0; *--sp = 0; *--sp = 0; + proc->sp = (uintptr_t)sp; + + proc->next = ready_queue_head; + proc->prev = ready_queue_tail; + ready_queue_tail->next = proc; + ready_queue_head->prev = proc; + ready_queue_tail = proc; + + spin_unlock_irqrestore(&sched_lock, flags); + return proc; } void process_init(void) { diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index dfa9d8d..0baf296 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -17,16 +17,35 @@ #include -struct file { - fs_node_t* node; - uint32_t offset; - uint32_t flags; - uint32_t refcount; -}; - static int fd_alloc(struct file* f); static int fd_close(int fd); +static int syscall_fork_impl(struct registers* regs) { + if (!regs) return -EINVAL; + if (!current_process) return -EINVAL; + + uintptr_t child_as = vmm_as_clone_user(current_process->addr_space); + if (!child_as) return -ENOMEM; + + struct registers child_regs = *regs; + child_regs.eax = 0; + + struct process* child = process_fork_create(child_as, &child_regs); + if (!child) { + vmm_as_destroy(child_as); + return -ENOMEM; + } + + for (int fd = 0; fd < PROCESS_MAX_FILES; fd++) { + struct file* f = current_process->files[fd]; + if (!f) continue; + f->refcount++; + child->files[fd] = f; + } + + return (int)child->pid; +} + struct pipe_state { uint8_t* buf; uint32_t cap; @@ -600,12 +619,6 @@ static void syscall_handler(struct registers* regs) { return; } - if (syscall_no == SYSCALL_SPAWN) { - int pid = process_spawn_test_child(); - regs->eax = (pid < 0) ? (uint32_t)-1 : (uint32_t)pid; - return; - } - if (syscall_no == SYSCALL_LSEEK) { int fd = (int)regs->ebx; int32_t off = (int32_t)regs->ecx; @@ -655,6 +668,11 @@ static void syscall_handler(struct registers* regs) { return; } + if (syscall_no == SYSCALL_FORK) { + regs->eax = (uint32_t)syscall_fork_impl(regs); + return; + } + regs->eax = (uint32_t)-1; } diff --git a/user/init.c b/user/init.c index b79d0e5..683d173 100644 --- a/user/init.c +++ b/user/init.c @@ -7,7 +7,6 @@ enum { SYSCALL_READ = 5, SYSCALL_CLOSE = 6, SYSCALL_WAITPID = 7, - SYSCALL_SPAWN = 8, SYSCALL_LSEEK = 9, SYSCALL_FSTAT = 10, SYSCALL_STAT = 11, @@ -16,6 +15,7 @@ enum { SYSCALL_DUP2 = 13, SYSCALL_PIPE = 14, SYSCALL_EXECVE = 15, + SYSCALL_FORK = 16, }; enum { @@ -45,6 +45,17 @@ static int sys_write(int fd, const void* buf, uint32_t len) { return ret; } +static int sys_fork(void) { + int ret; + __asm__ volatile( + "int $0x80" + : "=a"(ret) + : "a"(SYSCALL_FORK) + : "memory" + ); + return ret; +} + static int sys_execve(const char* path, const char* const* argv, const char* const* envp) { int ret; __asm__ volatile( @@ -100,17 +111,6 @@ static int sys_waitpid(int pid, int* status, uint32_t options) { return ret; } -static int sys_spawn(void) { - int ret; - __asm__ volatile( - "int $0x80" - : "=a"(ret) - : "a"(SYSCALL_SPAWN) - : "memory" - ); - return ret; -} - static int sys_open(const char* path, uint32_t flags) { int ret; __asm__ volatile( @@ -566,12 +566,16 @@ void _start(void) { enum { NCHILD = 100 }; int children[NCHILD]; for (int i = 0; i < NCHILD; i++) { - children[i] = sys_spawn(); - if (children[i] < 0) { - static const char smsg[] = "[init] spawn failed\n"; + int pid = sys_fork(); + if (pid < 0) { + static const char smsg[] = "[init] fork failed\n"; (void)sys_write(1, smsg, (uint32_t)(sizeof(smsg) - 1)); sys_exit(2); } + if (pid == 0) { + sys_exit(42); + } + children[i] = pid; } int ok = 1;