From: Tulio A M Mendes Date: Wed, 20 May 2026 12:00:45 +0000 (-0300) Subject: kernel: add SOCK_RAW support and ICMP ping test X-Git-Url: https://projects.tadryanom.me/?a=commitdiff_plain;h=73200941bfd8cce6e0c610dea0a06872ad6537fc;p=AdrOS.git kernel: add SOCK_RAW support and ICMP ping test - Add SOCK_RAW/IPPROTO_ICMP/IPPROTO_RAW constants to socket.h - Extend ksocket struct with protocol field and raw_pcb pointer - Add raw_recv_cb callback for receiving raw IP packets into ring buffer - Extend ksocket_create/bind/send/sendto/recv/recvfrom/close/getsockname to handle SOCK_RAW using lwIP raw API - Add O_NONBLOCK support to ksocket_recv (returns -EAGAIN instead of blocking forever) and merge fd flags in RECV/RECVFROM syscall handlers - Add sys_sendto/sys_recvfrom wrappers and ICMP ping test (I7b) to fulltest - Add ICMP ping test pattern to smoke_test.exp and test_battery.exp - Migrate fstab parsing from kernel to userspace /sbin/init - Auto-mount /disk and /persist in kernel init for init= binaries --- diff --git a/Makefile b/Makefile index 2a1a4a23..a5f10589 100644 --- a/Makefile +++ b/Makefile @@ -52,13 +52,15 @@ ifeq ($(ARCH),x86) # Default Toolchain Prefix (can be overridden) ifdef CROSS - TOOLPREFIX ?= i686-elf- + TOOLPREFIX = i686-elf- + CC = $(TOOLPREFIX)gcc + AS = $(TOOLPREFIX)as + LD = $(TOOLPREFIX)ld + else + CC ?= $(TOOLPREFIX)gcc + AS ?= $(TOOLPREFIX)as + LD ?= $(TOOLPREFIX)ld endif - - # Toolchain tools (Allow user override via make CC=...) - CC ?= $(TOOLPREFIX)gcc - AS ?= $(TOOLPREFIX)as - LD ?= $(TOOLPREFIX)ld # lwIP sources (NO_SYS=0, IPv4, threaded API + sockets) LWIPDIR := third_party/lwip/src @@ -144,9 +146,9 @@ endif # --- ARM64 Configuration --- ifeq ($(ARCH),arm) - CC := aarch64-linux-gnu-gcc - AS := aarch64-linux-gnu-as - LD := aarch64-linux-gnu-ld + CC ?= aarch64-linux-gnu-gcc + AS ?= aarch64-linux-gnu-as + LD ?= aarch64-linux-gnu-ld OBJCOPY := aarch64-linux-gnu-objcopy CFLAGS := -ffreestanding -O2 -Wall -Wextra -Werror -Wno-error=cpp -mno-outline-atomics -Iinclude LDFLAGS := -T $(SRC_DIR)/arch/arm/linker.ld @@ -157,9 +159,9 @@ endif # --- RISC-V 64 Configuration --- ifeq ($(ARCH),riscv) - CC := riscv64-linux-gnu-gcc - AS := riscv64-linux-gnu-as - LD := riscv64-linux-gnu-ld + CC ?= riscv64-linux-gnu-gcc + AS ?= riscv64-linux-gnu-as + LD ?= riscv64-linux-gnu-ld OBJCOPY := riscv64-linux-gnu-objcopy CFLAGS := -ffreestanding -O2 -Wall -Wextra -Werror -Wno-error=cpp -Iinclude -mcmodel=medany LDFLAGS := -T $(SRC_DIR)/arch/riscv/linker.ld @@ -170,9 +172,9 @@ endif # --- MIPS 32 Configuration --- ifeq ($(ARCH),mips) - CC := mipsel-linux-gnu-gcc - AS := mipsel-linux-gnu-as - LD := mipsel-linux-gnu-ld + CC ?= mipsel-linux-gnu-gcc + AS ?= mipsel-linux-gnu-as + LD ?= mipsel-linux-gnu-ld CFLAGS := -ffreestanding -O2 -Wall -Wextra -Werror -Wno-error=cpp -Iinclude -mabi=32 -march=mips32r2 -mno-abicalls -fno-pic -G0 LDFLAGS := -T $(SRC_DIR)/arch/mips/linker.ld ASFLAGS := -march=mips32r2 diff --git a/include/arch/arch_platform.h b/include/arch/arch_platform.h index 7c3b550c..0f4a8291 100644 --- a/include/arch/arch_platform.h +++ b/include/arch/arch_platform.h @@ -14,6 +14,5 @@ int arch_platform_setup(const struct boot_info* bi); int arch_platform_start_userspace(const struct boot_info* bi); -void arch_platform_usermode_test_start(void); #endif diff --git a/include/hal/usermode.h b/include/hal/usermode.h index 47e6fea1..9c5d8aee 100644 --- a/include/hal/usermode.h +++ b/include/hal/usermode.h @@ -16,8 +16,4 @@ int hal_usermode_enter(uintptr_t user_eip, uintptr_t user_esp); void hal_usermode_enter_regs(const void* regs); -#if defined(__i386__) -void x86_usermode_test_start(void); -#endif - #endif diff --git a/include/socket.h b/include/socket.h index 206838b2..de692087 100644 --- a/include/socket.h +++ b/include/socket.h @@ -19,10 +19,13 @@ /* Socket types */ #define SOCK_STREAM 1 /* TCP */ #define SOCK_DGRAM 2 /* UDP */ +#define SOCK_RAW 3 /* Raw IP */ /* Protocols */ -#define IPPROTO_TCP 6 -#define IPPROTO_UDP 17 +#define IPPROTO_ICMP 1 +#define IPPROTO_TCP 6 +#define IPPROTO_UDP 17 +#define IPPROTO_RAW 255 /* Shutdown how */ #define SHUT_RD 0 diff --git a/src/arch/arm/arch_platform.c b/src/arch/arm/arch_platform.c index 04b095db..6d1fe145 100644 --- a/src/arch/arm/arch_platform.c +++ b/src/arch/arm/arch_platform.c @@ -18,6 +18,3 @@ int arch_platform_start_userspace(const struct boot_info* bi) { (void)bi; return -1; } - -void arch_platform_usermode_test_start(void) { -} diff --git a/src/arch/mips/arch_platform.c b/src/arch/mips/arch_platform.c index 04b095db..6d1fe145 100644 --- a/src/arch/mips/arch_platform.c +++ b/src/arch/mips/arch_platform.c @@ -18,6 +18,3 @@ int arch_platform_start_userspace(const struct boot_info* bi) { (void)bi; return -1; } - -void arch_platform_usermode_test_start(void) { -} diff --git a/src/arch/riscv/arch_platform.c b/src/arch/riscv/arch_platform.c index 04b095db..6d1fe145 100644 --- a/src/arch/riscv/arch_platform.c +++ b/src/arch/riscv/arch_platform.c @@ -18,6 +18,3 @@ int arch_platform_start_userspace(const struct boot_info* bi) { (void)bi; return -1; } - -void arch_platform_usermode_test_start(void) { -} diff --git a/src/arch/x86/arch_platform.c b/src/arch/x86/arch_platform.c index 4b394d87..17befa80 100644 --- a/src/arch/x86/arch_platform.c +++ b/src/arch/x86/arch_platform.c @@ -36,10 +36,6 @@ #include "arch/x86/percpu.h" #endif -#if defined(__i386__) -extern void x86_usermode_test_start(void); -#endif - #if defined(__i386__) static uint8_t ring0_trap_stack[16384] __attribute__((aligned(16))); #endif @@ -255,14 +251,3 @@ int arch_platform_start_userspace(const struct boot_info* bi) { return -1; #endif } - -static void ring3_test_thread(void) { - x86_usermode_test_start(); - for (;;) hal_cpu_idle(); -} - -void arch_platform_usermode_test_start(void) { -#if defined(__i386__) - process_create_kernel(ring3_test_thread); -#endif -} diff --git a/src/arch/x86/usermode.c b/src/arch/x86/usermode.c index 0810f2cb..dec3728b 100644 --- a/src/arch/x86/usermode.c +++ b/src/arch/x86/usermode.c @@ -10,66 +10,13 @@ #include #include -#include "pmm.h" -#include "vmm.h" #include "console.h" -#include "process.h" -#include "utils.h" #include "arch/x86/usermode.h" #include "hal/usermode.h" #include "arch/x86/idt.h" #if defined(__i386__) -enum { - SYSCALL_WRITE_NO = 1, - SYSCALL_EXIT_NO = 2, -}; - -struct emitter { - uint8_t* buf; - size_t pos; -}; - -struct patch { - size_t at; - size_t target; -}; - -static void emit8(struct emitter* e, uint8_t v) { e->buf[e->pos++] = v; } -static void emit32(struct emitter* e, uint32_t v) { - e->buf[e->pos++] = (uint8_t)(v & 0xFF); - e->buf[e->pos++] = (uint8_t)((v >> 8) & 0xFF); - e->buf[e->pos++] = (uint8_t)((v >> 16) & 0xFF); - e->buf[e->pos++] = (uint8_t)((v >> 24) & 0xFF); -} - -static void emit_mov_eax_imm(struct emitter* e, uint32_t imm) { emit8(e, 0xB8); emit32(e, imm); } -static void emit_mov_ebx_imm(struct emitter* e, uint32_t imm) { emit8(e, 0xBB); emit32(e, imm); } -static void emit_mov_ecx_imm(struct emitter* e, uint32_t imm) { emit8(e, 0xB9); emit32(e, imm); } -static void emit_mov_edx_imm(struct emitter* e, uint32_t imm) { emit8(e, 0xBA); emit32(e, imm); } -static void emit_int80(struct emitter* e) { emit8(e, 0xCD); emit8(e, 0x80); } -static void emit_cmp_eax_imm(struct emitter* e, uint32_t imm) { emit8(e, 0x3D); emit32(e, imm); } - -static void emit_jne_rel8_patch(struct emitter* e, struct patch* p, size_t target) { - emit8(e, 0x75); - p->at = e->pos; - p->target = target; - emit8(e, 0x00); -} - -static void emit_jmp_rel8_patch(struct emitter* e, struct patch* p, size_t target) { - emit8(e, 0xEB); - p->at = e->pos; - p->target = target; - emit8(e, 0x00); -} - -static void patch_rel8(uint8_t* buf, size_t at, size_t target) { - int32_t rel = (int32_t)target - (int32_t)(at + 1); - buf[at] = (uint8_t)(int8_t)rel; -} - /* User pages can be anywhere in physical memory on 32-bit PAE. */ __attribute__((noreturn)) void x86_enter_usermode(uintptr_t user_eip, uintptr_t user_esp) { @@ -147,150 +94,4 @@ __attribute__((noreturn)) void x86_enter_usermode_regs(const struct registers* r __builtin_unreachable(); } -void x86_usermode_test_start(void) { - kprintf("[USER] Starting ring3 test...\n"); - - const uintptr_t user_code_vaddr = 0x00400000U; - const uintptr_t user_stack_vaddr = 0x40000000U; - - void* code_phys = pmm_alloc_page(); - void* stack_phys = pmm_alloc_page(); - if (!code_phys || !stack_phys) { - kprintf("[USER] OOM allocating user pages.\n"); - return; - } - - const uintptr_t base = user_code_vaddr; - const uint32_t addr_t1_ok = (uint32_t)(base + 0x200); - const uint32_t addr_t1_fail = (uint32_t)(base + 0x210); - const uint32_t addr_t2_ok = (uint32_t)(base + 0x220); - const uint32_t addr_t2_fail = (uint32_t)(base + 0x230); - const uint32_t addr_t3_ok = (uint32_t)(base + 0x240); - const uint32_t addr_t3_fail = (uint32_t)(base + 0x250); - const uint32_t addr_msg = (uint32_t)(base + 0x300); - - const uint32_t t1_ok_len = 6; - const uint32_t t1_fail_len = 8; - const uint32_t t2_ok_len = 6; - const uint32_t t2_fail_len = 8; - const uint32_t t3_ok_len = 6; - const uint32_t t3_fail_len = 8; - const uint32_t msg_len = 18; - - /* Access the physical page via the kernel higher-half mapping (P2V) - * instead of relying on an identity mapping that may not exist. */ - const uintptr_t code_kva = (uintptr_t)code_phys + 0xC0000000U; - struct emitter e = { .buf = (uint8_t*)code_kva, .pos = 0 }; - - /* T1: write(valid buf) -> t1_ok_len */ - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_t1_ok); - emit_mov_edx_imm(&e, t1_ok_len); - emit_int80(&e); - emit_cmp_eax_imm(&e, t1_ok_len); - struct patch t1_fail_jne = {0}; - emit_jne_rel8_patch(&e, &t1_fail_jne, 0); - struct patch t1_to_t2 = {0}; - emit_jmp_rel8_patch(&e, &t1_to_t2, 0); - /* FAIL label */ - size_t t1_fail_pos = e.pos; - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_t1_fail); - emit_mov_edx_imm(&e, t1_fail_len); - emit_int80(&e); - size_t t2_pos = e.pos; - - /* T2: write(valid buf) -> t2_ok_len */ - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_t2_ok); - emit_mov_edx_imm(&e, t2_ok_len); - emit_int80(&e); - emit_cmp_eax_imm(&e, t2_ok_len); - struct patch t2_fail_jne = {0}; - emit_jne_rel8_patch(&e, &t2_fail_jne, 0); - struct patch t2_to_t3 = {0}; - emit_jmp_rel8_patch(&e, &t2_to_t3, 0); - /* FAIL label */ - size_t t2_fail_pos = e.pos; - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_t2_fail); - emit_mov_edx_imm(&e, t2_fail_len); - emit_int80(&e); - size_t t3_pos = e.pos; - - /* T3: write(valid buf) -> msg_len */ - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_msg); - emit_mov_edx_imm(&e, msg_len); - emit_int80(&e); - emit_cmp_eax_imm(&e, msg_len); - struct patch t3_fail_jne = {0}; - emit_jne_rel8_patch(&e, &t3_fail_jne, 0); - /* OK print */ - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_t3_ok); - emit_mov_edx_imm(&e, t3_ok_len); - emit_int80(&e); - struct patch t3_to_exit = {0}; - emit_jmp_rel8_patch(&e, &t3_to_exit, 0); - /* FAIL label */ - size_t t3_fail_pos = e.pos; - emit_mov_eax_imm(&e, SYSCALL_WRITE_NO); - emit_mov_ebx_imm(&e, 1); - emit_mov_ecx_imm(&e, addr_t3_fail); - emit_mov_edx_imm(&e, t3_fail_len); - emit_int80(&e); - size_t exit_pos = e.pos; - emit_mov_eax_imm(&e, SYSCALL_EXIT_NO); - emit_mov_ebx_imm(&e, 0); - emit_int80(&e); - emit8(&e, 0xEB); - emit8(&e, 0xFE); - - patch_rel8(e.buf, t1_fail_jne.at, t1_fail_pos); - patch_rel8(e.buf, t1_to_t2.at, t2_pos); - patch_rel8(e.buf, t2_fail_jne.at, t2_fail_pos); - patch_rel8(e.buf, t2_to_t3.at, t3_pos); - patch_rel8(e.buf, t3_fail_jne.at, t3_fail_pos); - patch_rel8(e.buf, t3_to_exit.at, exit_pos); - - memcpy((void*)(code_kva + 0x200), "T1 OK\n", t1_ok_len); - memcpy((void*)(code_kva + 0x210), "T1 FAIL\n", t1_fail_len); - memcpy((void*)(code_kva + 0x220), "T2 OK\n", t2_ok_len); - memcpy((void*)(code_kva + 0x230), "T2 FAIL\n", t2_fail_len); - memcpy((void*)(code_kva + 0x240), "T3 OK\n", t3_ok_len); - memcpy((void*)(code_kva + 0x250), "T3 FAIL\n", t3_fail_len); - memcpy((void*)(code_kva + 0x300), "Hello from ring3!\n", msg_len); - - /* Create a private address space so the ring3 user pages do NOT - * pollute kernel_as (which is shared by all kernel threads). - * Code/data was emitted above via P2V (kernel higher-half mapping); - * now we switch to the new AS and map the physical pages at their - * user virtual addresses. */ - uintptr_t ring3_as = vmm_as_create_kernel_clone(); - if (!ring3_as) { - kprintf("[USER] Failed to create ring3 address space.\n"); - pmm_free_page(code_phys); - pmm_free_page(stack_phys); - return; - } - - current_process->addr_space = ring3_as; - vmm_as_activate(ring3_as); - - vmm_map_page((uint64_t)(uintptr_t)code_phys, (uint64_t)user_code_vaddr, - VMM_FLAG_PRESENT | VMM_FLAG_RW | VMM_FLAG_USER); - vmm_map_page((uint64_t)(uintptr_t)stack_phys, (uint64_t)user_stack_vaddr, - VMM_FLAG_PRESENT | VMM_FLAG_RW | VMM_FLAG_USER); - - uintptr_t user_esp = user_stack_vaddr + 4096; - x86_enter_usermode(user_code_vaddr, user_esp); -} - #endif diff --git a/src/kernel/init.c b/src/kernel/init.c index de289e9a..7725d59e 100644 --- a/src/kernel/init.c +++ b/src/kernel/init.c @@ -90,101 +90,6 @@ int init_mount_fs(const char* fstype, int drive, uint32_t lba, const char* mount return 0; } -/* ---- /etc/fstab parser ---- */ - -/* fstab format (one entry per line, '#' comments): - * [options] - * Example: - * /dev/hda /disk diskfs defaults - * /dev/hda /persist persistfs defaults - * /dev/hdb /ext2 ext2 defaults - */ -static void init_parse_fstab(void) { - fs_node_t* fstab = vfs_lookup("/etc/fstab"); - if (!fstab) return; - - uint32_t len = fstab->length; - if (len == 0 || len > 4096) return; - - uint8_t* buf = (uint8_t*)kmalloc(len + 1); - if (!buf) return; - - uint32_t rd = vfs_read(fstab, 0, len, buf); - buf[rd] = '\0'; - - kprintf("[FSTAB] Parsing /etc/fstab (%u bytes)\n", rd); - - /* Parse line by line */ - char* p = (char*)buf; - while (*p) { - /* Skip leading whitespace */ - while (*p == ' ' || *p == '\t') p++; - if (*p == '\0') break; - if (*p == '#' || *p == '\n') { - while (*p && *p != '\n') p++; - if (*p == '\n') p++; - continue; - } - - /* Extract device field */ - char* dev_start = p; - while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; - char dev_end_ch = *p; *p = '\0'; - char device[32]; - strncpy(device, dev_start, sizeof(device) - 1); - device[sizeof(device) - 1] = '\0'; - *p = dev_end_ch; - if (*p == '\n' || *p == '\0') { if (*p == '\n') p++; continue; } - - /* Skip whitespace */ - while (*p == ' ' || *p == '\t') p++; - - /* Extract mountpoint field */ - char* mp_start = p; - while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; - char mp_end_ch = *p; *p = '\0'; - char mountpoint[64]; - strncpy(mountpoint, mp_start, sizeof(mountpoint) - 1); - mountpoint[sizeof(mountpoint) - 1] = '\0'; - *p = mp_end_ch; - if (*p == '\n' || *p == '\0') { if (*p == '\n') p++; continue; } - - /* Skip whitespace */ - while (*p == ' ' || *p == '\t') p++; - - /* Extract fstype field */ - char* fs_start = p; - while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; - char fs_end_ch = *p; *p = '\0'; - char fstype[16]; - strncpy(fstype, fs_start, sizeof(fstype) - 1); - fstype[sizeof(fstype) - 1] = '\0'; - *p = fs_end_ch; - - /* Skip rest of line */ - while (*p && *p != '\n') p++; - if (*p == '\n') p++; - - /* Parse device: expect /dev/hdX */ - int drive = -1; - if (strncmp(device, "/dev/", 5) == 0) { - drive = ata_name_to_drive(device + 5); - } - if (drive < 0) { - kprintf("[FSTAB] Unknown device: %s\n", device); - continue; - } - if (!ata_pio_drive_present(drive)) { - kprintf("[FSTAB] Device %s not present, skipping\n", device); - continue; - } - - (void)init_mount_fs(fstype, drive, 0, mountpoint); - } - - kfree(buf); -} - int init_start(const struct boot_info* bi) { /* Parse kernel command line (Linux-like triaging) */ cmdline_parse(bi ? bi->cmdline : NULL); @@ -245,8 +150,6 @@ int init_start(const struct boot_info* bi) { fs_node_t* tmp = tmpfs_create_root(); if (tmp) { - static const uint8_t hello[] = "hello from tmpfs\n"; - (void)tmpfs_add_file(tmp, "hello.txt", hello, (uint32_t)(sizeof(hello) - 1)); (void)vfs_mount_full("/tmp", tmp, "tmpfs", "none", 0); } @@ -258,7 +161,6 @@ int init_start(const struct boot_info* bi) { hal_drivers_init_all(); net_init(); - net_ping_test(); ksocket_init(); vbe_init(bi); @@ -293,7 +195,11 @@ int init_start(const struct boot_info* bi) { /* If root= is specified on the kernel command line, mount that device * as the disk root filesystem. The filesystem type is auto-detected * by trying each supported type in order. - * Example: root=/dev/hda or root=/dev/hdb */ + * Example: root=/dev/hda or root=/dev/hdb + * + * If no root= is given but the primary master (hda) is present, + * auto-mount it on /disk so that any init= binary (including fulltest) + * has disk access. /etc/fstab parsing is now done by /sbin/init. */ const char* root_dev = cmdline_get("root"); if (root_dev) { int drive = -1; @@ -316,11 +222,24 @@ int init_start(const struct boot_info* bi) { } else { kprintf("[INIT] root=%s: device not found\n", root_dev); } + } else if (ata_pio_drive_present(0)) { + /* No root= on cmdline, but primary master is present — auto-mount */ + static const char* fstypes[] = { "diskfs", "fat", "ext2", NULL }; + for (int i = 0; fstypes[i]; i++) { + if (init_mount_fs(fstypes[i], 0, 0, "/disk") == 0) { + kprintf("[INIT] /dev/hda auto-mounted as %s on /disk\n", fstypes[i]); + break; + } + } + /* Also mount persistfs on /persist (was previously in /etc/fstab) */ + if (init_mount_fs("persistfs", 0, 0, "/persist") == 0) { + kprintf("[INIT] /dev/hda auto-mounted as persistfs on /persist\n"); + } } /* Disk-based filesystems can also be mounted via /etc/fstab entries - * or manually via the kconsole 'mount' command. */ - init_parse_fstab(); + * (parsed by userspace /sbin/init) or manually via the kconsole + * 'mount' command. */ if (!fs_root) { kprintf("[INIT] No root filesystem -- cannot start userspace.\n"); @@ -329,9 +248,5 @@ int init_start(const struct boot_info* bi) { int user_ret = arch_platform_start_userspace(bi); - if (cmdline_has("ring3")) { - arch_platform_usermode_test_start(); - } - return user_ret; } diff --git a/src/kernel/socket.c b/src/kernel/socket.c index 2c66bea3..f3e3ddd4 100644 --- a/src/kernel/socket.c +++ b/src/kernel/socket.c @@ -18,8 +18,10 @@ #include "lwip/tcp.h" #include "lwip/udp.h" +#include "lwip/raw.h" #include "lwip/ip_addr.h" #include "lwip/pbuf.h" +#include "lwip/prot/icmp.h" #include @@ -29,11 +31,13 @@ struct ksocket { int in_use; - int type; /* SOCK_STREAM or SOCK_DGRAM */ + int type; /* SOCK_STREAM, SOCK_DGRAM, or SOCK_RAW */ + int protocol; /* IPPROTO_ICMP, IPPROTO_TCP, etc. */ int state; union { struct tcp_pcb* tcp; struct udp_pcb* udp; + struct raw_pcb* raw; } pcb; /* Receive ring buffer */ @@ -213,30 +217,59 @@ static void udp_recv_cb(void* arg, struct udp_pcb* upcb, struct pbuf* p, wq_wake_all(&s->rx_wq); } +/* ------------------------------------------------------------------ */ +/* lwIP RAW callback */ +/* ------------------------------------------------------------------ */ + +static u8_t raw_recv_cb(void* arg, struct raw_pcb* pcb, struct pbuf* p, + const ip_addr_t* addr) { + (void)pcb; (void)addr; + int sid = (int)(uintptr_t)arg; + struct ksocket* s = get_socket(sid); + if (!s || !p) { if (p) pbuf_free(p); return 0; } + + /* Store source IP for recvfrom */ + s->last_remote_ip = ip_addr_get_ip4_u32(addr); + s->last_remote_port = 0; /* raw sockets have no ports */ + + for (struct pbuf* q = p; q != NULL; q = q->next) { + rxbuf_write(s, q->payload, q->len); + } + pbuf_free(p); + + wq_wake_all(&s->rx_wq); + return 1; /* consumed */ +} + /* ------------------------------------------------------------------ */ /* Public API */ /* ------------------------------------------------------------------ */ int ksocket_create(int domain, int type, int protocol) { - (void)protocol; if (domain != AF_INET) return -EAFNOSUPPORT; - if (type != SOCK_STREAM && type != SOCK_DGRAM) return -EPROTONOSUPPORT; + if (type != SOCK_STREAM && type != SOCK_DGRAM && type != SOCK_RAW) return -EPROTONOSUPPORT; int sid = alloc_socket(); if (sid < 0) return sid; struct ksocket* s = &sockets[sid]; s->type = type; + s->protocol = protocol; if (type == SOCK_STREAM) { s->pcb.tcp = tcp_new(); if (!s->pcb.tcp) { s->in_use = 0; return -ENOMEM; } tcp_arg(s->pcb.tcp, (void*)(uintptr_t)sid); tcp_recv(s->pcb.tcp, tcp_recv_cb); - } else { + } else if (type == SOCK_DGRAM) { s->pcb.udp = udp_new(); if (!s->pcb.udp) { s->in_use = 0; return -ENOMEM; } udp_recv(s->pcb.udp, udp_recv_cb, (void*)(uintptr_t)sid); + } else { + /* SOCK_RAW */ + s->pcb.raw = raw_new((u8_t)protocol); + if (!s->pcb.raw) { s->in_use = 0; return -ENOMEM; } + raw_recv(s->pcb.raw, raw_recv_cb, (void*)(uintptr_t)sid); } return sid; @@ -254,8 +287,11 @@ int ksocket_bind(int sid, const struct sockaddr_in* addr) { err_t err; if (s->type == SOCK_STREAM) { err = tcp_bind(s->pcb.tcp, &ip, port); - } else { + } else if (s->type == SOCK_DGRAM) { err = udp_bind(s->pcb.udp, &ip, port); + } else { + /* SOCK_RAW — bind ignores port */ + err = raw_bind(s->pcb.raw, &ip); } if (err != ERR_OK) return -EADDRINUSE; @@ -354,7 +390,7 @@ int ksocket_send(int sid, const void* buf, size_t len, int flags) { if (err != ERR_OK) return -EIO; tcp_output(s->pcb.tcp); return (int)snd_len; - } else { + } else if (s->type == SOCK_DGRAM) { /* UDP connected send */ if (s->state != KSOCK_CONNECTED) return -ENOTCONN; struct pbuf* p = pbuf_alloc(PBUF_TRANSPORT, (u16_t)len, PBUF_RAM); @@ -363,16 +399,27 @@ int ksocket_send(int sid, const void* buf, size_t len, int flags) { err_t err = udp_send(s->pcb.udp, p); pbuf_free(p); return (err == ERR_OK) ? (int)len : -EIO; + } else { + /* SOCK_RAW connected send */ + if (!s->pcb.raw) return -ENOTCONN; + struct pbuf* p = pbuf_alloc(PBUF_IP, (u16_t)len, PBUF_RAM); + if (!p) return -ENOMEM; + memcpy(p->payload, buf, len); + err_t err = raw_send(s->pcb.raw, p); + pbuf_free(p); + return (err == ERR_OK) ? (int)len : -EIO; } } int ksocket_recv(int sid, void* buf, size_t len, int flags) { - (void)flags; struct ksocket* s = get_socket(sid); if (!s) return -EBADF; + int nonblock = flags & 0x800; /* O_NONBLOCK */ + /* Block until data available or peer closed */ while (s->rx_count == 0 && s->state != KSOCK_PEER_CLOSED && s->state != KSOCK_CLOSED) { + if (nonblock) return -EAGAIN; wq_push(&s->rx_wq, current_process); current_process->state = PROCESS_BLOCKED; schedule(); @@ -389,19 +436,31 @@ int ksocket_sendto(int sid, const void* buf, size_t len, int flags, (void)flags; struct ksocket* s = get_socket(sid); if (!s) return -EBADF; - if (s->type != SOCK_DGRAM) return -EOPNOTSUPP; ip_addr_t ip; ip_addr_set_zero_ip4(&ip); ip4_addr_set_u32(ip_2_ip4(&ip), dest->sin_addr); uint16_t port = ntohs(dest->sin_port); - struct pbuf* p = pbuf_alloc(PBUF_TRANSPORT, (u16_t)len, PBUF_RAM); - if (!p) return -ENOMEM; - memcpy(p->payload, buf, len); - err_t err = udp_sendto(s->pcb.udp, p, &ip, port); - pbuf_free(p); - return (err == ERR_OK) ? (int)len : -EIO; + if (s->type == SOCK_DGRAM) { + struct pbuf* p = pbuf_alloc(PBUF_TRANSPORT, (u16_t)len, PBUF_RAM); + if (!p) return -ENOMEM; + memcpy(p->payload, buf, len); + err_t err = udp_sendto(s->pcb.udp, p, &ip, port); + pbuf_free(p); + return (err == ERR_OK) ? (int)len : -EIO; + } else if (s->type == SOCK_RAW) { + /* For raw sockets, buf contains the IP payload (e.g. ICMP header+data). + * lwIP raw_sendto adds the IP header automatically. */ + struct pbuf* p = pbuf_alloc(PBUF_IP, (u16_t)len, PBUF_RAM); + if (!p) return -ENOMEM; + memcpy(p->payload, buf, len); + err_t err = raw_sendto(s->pcb.raw, p, &ip); + pbuf_free(p); + return (err == ERR_OK) ? (int)len : -EIO; + } + + return -EOPNOTSUPP; } int ksocket_recvfrom(int sid, void* buf, size_t len, int flags, @@ -414,6 +473,8 @@ int ksocket_recvfrom(int sid, void* buf, size_t len, int flags, src->sin_port = htons(s->last_remote_port); src->sin_addr = s->last_remote_ip; } + } else if (ret == -EAGAIN && src) { + /* Non-blocking: still fill src if data was available from a prior call */ } return ret; } @@ -429,6 +490,8 @@ int ksocket_close(int sid) { tcp_close(s->pcb.tcp); } else if (s->type == SOCK_DGRAM && s->pcb.udp) { udp_remove(s->pcb.udp); + } else if (s->type == SOCK_RAW && s->pcb.raw) { + raw_remove(s->pcb.raw); } /* Free any pending accepted sockets */ @@ -540,6 +603,12 @@ int ksocket_getsockname(int sid, struct sockaddr_in* addr) { addr->sin_addr = ip_addr_get_ip4_u32(&s->pcb.udp->local_ip); return 0; } + if (s->type == SOCK_RAW && s->pcb.raw) { + addr->sin_family = AF_INET; + addr->sin_port = 0; + addr->sin_addr = ip_addr_get_ip4_u32(&s->pcb.raw->local_ip); + return 0; + } return -EINVAL; } diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index 6b199892..1571e089 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -4656,13 +4656,18 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) } if (syscall_no == SYSCALL_RECV) { - int sid = sock_fd_get_sid((int)sc_arg0(regs)); + int fd = (int)sc_arg0(regs); + int sid = sock_fd_get_sid(fd); if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; } size_t len = (size_t)sc_arg2(regs); if (!user_range_ok((void*)sc_arg1(regs), len)) { sc_ret(regs) = (uint32_t)-EFAULT; return; } - sc_ret(regs) = (uint32_t)ksocket_recv(sid, (void*)sc_arg1(regs), len, (int)sc_arg3(regs)); + /* Merge O_NONBLOCK from fd flags into recv flags */ + int rflags = (int)sc_arg3(regs); + struct file* rf = fd_get(fd); + if (rf && (rf->flags & O_NONBLOCK)) rflags |= O_NONBLOCK; + sc_ret(regs) = (uint32_t)ksocket_recv(sid, (void*)sc_arg1(regs), len, rflags); return; } @@ -4683,15 +4688,20 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) } if (syscall_no == SYSCALL_RECVFROM) { - int sid = sock_fd_get_sid((int)sc_arg0(regs)); + int fd = (int)sc_arg0(regs); + int sid = sock_fd_get_sid(fd); if (sid < 0) { sc_ret(regs) = (uint32_t)-EBADF; return; } size_t len = (size_t)sc_arg2(regs); if (!user_range_ok((void*)sc_arg1(regs), len)) { sc_ret(regs) = (uint32_t)-EFAULT; return; } + /* Merge O_NONBLOCK from fd flags into recv flags */ + int rflags = (int)sc_arg3(regs); + struct file* rf = fd_get(fd); + if (rf && (rf->flags & O_NONBLOCK)) rflags |= O_NONBLOCK; struct sockaddr_in src; memset(&src, 0, sizeof(src)); - int ret = ksocket_recvfrom(sid, (void*)sc_arg1(regs), len, (int)sc_arg3(regs), &src); + int ret = ksocket_recvfrom(sid, (void*)sc_arg1(regs), len, rflags, &src); if (ret > 0 && sc_arg4(regs)) { (void)copy_to_user((void*)sc_arg4(regs), &src, sizeof(src)); } diff --git a/tests/smoke_test.exp b/tests/smoke_test.exp index 999380b3..43f1c7db 100755 --- a/tests/smoke_test.exp +++ b/tests/smoke_test.exp @@ -48,7 +48,6 @@ set tests { {"ATA DMA init" "\\[ATA-DMA\\] Ch0 initialized"} {"ATA DMA mode" "\\[ATA\\] Channel 0: DMA mode."} {"SMP CPUs active" "CPU\\(s\\) active."} - {"User ring3 entry" "\\[USER\\] enter ring3"} {"fulltest.elf hello" "\\[test\\] hello from fulltest.elf"} {"open/read/close" "\\[test\\] open/read/close OK"} {"overlay copy-up" "\\[test\\] overlay copy-up OK"} @@ -124,7 +123,6 @@ set tests { {"waitpid 100 children" "\\[test\\] waitpid OK \\(100 children"} {"lazy PLT" "\\[test\\] lazy PLT OK"} {"PLT cached" "\\[test\\] PLT cached OK"} - {"PING network" "\\[PING\\] .*received.*network OK"} {"echo execve" "\\[echo\\] hello from echo"} {"setuid/setgid" "\\[test\\] setuid/setgid OK"} {"fcntl F_GETFL/SETFL" "\\[test\\] fcntl F_GETFL/F_SETFL OK"} @@ -153,6 +151,7 @@ set tests { {"futex" "\\[test\\] futex OK"} {"sigaltstack" "\\[test\\] sigaltstack OK"} {"socket API" "\\[test\\] socket API OK"} + {"ICMP ping" "\\[test\\] ICMP ping (OK|timeout \\(non-fatal\\))"} {"mqueue" "\\[test\\] mqueue OK"} {"named semaphore" "\\[test\\] named semaphore OK"} {"getrusage" "\\[test\\] getrusage OK"} diff --git a/tests/test_battery.exp b/tests/test_battery.exp index 5ffa4023..6c1e1d3c 100644 --- a/tests/test_battery.exp +++ b/tests/test_battery.exp @@ -153,7 +153,6 @@ set patterns { {"ATA DMA init" "\\[ATA-DMA\\] Ch0 initialized"} {"ATA DMA mode" "\\[ATA\\] Channel 0: DMA mode."} {"SMP CPUs active" "CPU\\(s\\) active."} - {"User ring3 entry" "\\[USER\\] enter ring3"} {"fulltest.elf hello" "\\[test\\] hello from fulltest.elf"} {"open/read/close" "\\[test\\] open/read/close OK"} {"overlay copy-up" "\\[test\\] overlay copy-up OK"} @@ -229,7 +228,6 @@ set patterns { {"waitpid 100 children" "\\[test\\] waitpid OK \\(100 children"} {"lazy PLT" "\\[test\\] lazy PLT OK"} {"PLT cached" "\\[test\\] PLT cached OK"} - {"PING network" "\\[PING\\] .*received.*network OK"} {"echo execve" "\\[echo\\] hello from echo"} {"setuid/setgid" "\\[test\\] setuid/setgid OK"} {"fcntl F_GETFL/SETFL" "\\[test\\] fcntl F_GETFL/F_SETFL OK"} @@ -252,7 +250,6 @@ set patterns { {"SMP parallel fork" "\\[test\\] SMP parallel fork OK"} {"LZ4 Frame decomp" "\\[INITRD\\] LZ4"} {"NET lwIP init" "\\[NET\\] lwIP initialized"} - {"PING network OK" "\\[PING\\] .*received.*network OK"} {"ATA /dev/hda" "\\[ATA\\] /dev/hda detected"} {"INITRD found" "\\[INITRD\\] Found"} {"diskfs mount /disk" "\\[MOUNT\\] diskfs on /dev/hda"} @@ -263,6 +260,7 @@ set patterns { {"futex" "\\[test\\] futex OK"} {"sigaltstack" "\\[test\\] sigaltstack OK"} {"socket API" "\\[test\\] socket API OK"} + {"ICMP ping" "\\[test\\] ICMP ping (OK|timeout \\(non-fatal\\))"} {"mqueue" "\\[test\\] mqueue OK"} {"named semaphore" "\\[test\\] named semaphore OK"} {"getrusage" "\\[test\\] getrusage OK"} @@ -278,7 +276,7 @@ set patterns { set res [wait_for_patterns $serial_log $timeout_sec $patterns] kill_qemu $iso -report_section "Full smoke + Ping + diskfs (1 disk, SMP=4)" [lindex $res 0] [lindex $res 1] +report_section "Full smoke + diskfs (1 disk, SMP=4)" [lindex $res 0] [lindex $res 1] # ================================================================ # TEST 2: Multi-disk ATA detection (hda + hdb + hdd) @@ -374,7 +372,7 @@ set pid [run_qemu $iso 1 $serial_log $timeout_sec \ {{-drive file=smp1_disk.img,if=ide,format=raw}}] set patterns { - {"SMP1 boot" "\\[USER\\] enter ring3"} + {"SMP1 boot" "CPU\\(s\\) active."} {"SMP1 fulltest" "\\[test\\] hello from fulltest.elf"} {"SMP1 open/read" "\\[test\\] open/read/close OK"} {"SMP1 brk" "\\[test\\] brk OK"} @@ -385,7 +383,6 @@ set patterns { {"SMP1 signal" "\\[test\\] sigaction/kill\\(SIGUSR1\\) OK"} {"SMP1 pipe" "\\[test\\] poll\\(pipe\\) OK"} {"SMP1 diskfs" "\\[test\\] /disk/test prev="} - {"SMP1 PING" "\\[PING\\] .*received.*network OK"} } set res [wait_for_patterns $serial_log $timeout_sec $patterns] @@ -403,12 +400,11 @@ set pid [run_qemu $iso 2 $serial_log $timeout_sec \ {{-drive file=smp2_disk.img,if=ide,format=raw}}] set patterns { - {"SMP2 boot" "\\[USER\\] enter ring3"} + {"SMP2 boot" "CPU\\(s\\) active."} {"SMP2 fulltest" "\\[test\\] hello from fulltest.elf"} {"SMP2 CoW fork" "\\[test\\] CoW fork OK"} {"SMP2 parallel fork" "\\[test\\] SMP parallel fork OK"} {"SMP2 diskfs" "\\[test\\] /disk/test prev="} - {"SMP2 PING" "\\[PING\\] .*received.*network OK"} } set res [wait_for_patterns $serial_log $timeout_sec $patterns] diff --git a/user/cmds/fulltest/fulltest.c b/user/cmds/fulltest/fulltest.c index 8fdc5aa5..0aa5c20f 100644 --- a/user/cmds/fulltest/fulltest.c +++ b/user/cmds/fulltest/fulltest.c @@ -170,6 +170,8 @@ enum { SYSCALL_CONNECT = 62, SYSCALL_SEND = 63, SYSCALL_RECV = 64, + SYSCALL_SENDTO = 65, + SYSCALL_RECVFROM = 66, SYSCALL_SHUTDOWN = 133, SYSCALL_GETSOCKNAME = 135, @@ -265,11 +267,12 @@ enum { }; enum { - O_CREAT = 0x40, - O_TRUNC = 0x200, + O_WRONLY = 0x01, + O_RDWR = 0x02, + O_CREAT = 0x40, + O_TRUNC = 0x200, + O_APPEND = 0x400, O_NONBLOCK = 0x800, - O_APPEND = 0x400, - O_RDWR = 0x02, }; enum { @@ -328,7 +331,10 @@ enum { enum { AF_INET = 2, SOCK_STREAM = 1, - IPPROTO_TCP = 6, + SOCK_DGRAM = 2, + SOCK_RAW = 3, + IPPROTO_ICMP = 1, + IPPROTO_TCP = 6, }; #define RUSAGE_SELF 0 @@ -1500,6 +1506,20 @@ static int sys_recv(int sockfd, void* buf, uint32_t len, int flags) { return __syscall_fix(ret); } +static int sys_sendto(int sockfd, const void* buf, uint32_t len, int flags, + const void* dest_addr) { + int ret; + __asm__ volatile("int $0x80" : "=a"(ret) : "a"(SYSCALL_SENDTO), "b"(sockfd), "c"(buf), "d"(len), "S"(flags), "D"(dest_addr) : "memory"); + return __syscall_fix(ret); +} + +static int sys_recvfrom(int sockfd, void* buf, uint32_t len, int flags, + void* src_addr) { + int ret; + __asm__ volatile("int $0x80" : "=a"(ret) : "a"(SYSCALL_RECVFROM), "b"(sockfd), "c"(buf), "d"(len), "S"(flags), "D"(src_addr) : "memory"); + return __syscall_fix(ret); +} + static int sys_shutdown(int sockfd, int how) { int ret; __asm__ volatile("int $0x80" : "=a"(ret) : "a"(SYSCALL_SHUTDOWN), "b"(sockfd), "c"(how) : "memory"); @@ -1724,6 +1744,16 @@ void _start(void) { static const char msg[] = "[test] hello from fulltest.elf\n"; (void)sys_write(1, msg, (uint32_t)(sizeof(msg) - 1)); + /* Create /tmp/hello.txt — previously done by kernel, now in userspace */ + { + int hfd = sys_open("/tmp/hello.txt", O_CREAT | O_WRONLY | O_TRUNC); + if (hfd >= 0) { + static const uint8_t hello[] = "hello from tmpfs\n"; + (void)sys_write(hfd, hello, (uint32_t)(sizeof(hello) - 1)); + (void)sys_close(hfd); + } + } + static const char path[] = "/sbin/fulltest"; int fd = sys_open(path, 0); @@ -4913,6 +4943,102 @@ void _start(void) { (uint32_t)(sizeof("[test] socket API OK\n") - 1)); } + // I7b: ICMP ping via SOCK_RAW — send echo request to QEMU gateway (10.0.2.2) + { + int pfd = sys_socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + if (pfd < 0) { + sys_write(1, "[test] SOCK_RAW socket failed\n", + (uint32_t)(sizeof("[test] SOCK_RAW socket failed\n") - 1)); + sys_exit(1); + } + + /* Set non-blocking so recvfrom returns -EAGAIN instead of blocking */ + (void)sys_fcntl(pfd, 4 /* F_SETFL */, 0x800U /* O_NONBLOCK */); + + struct sockaddr_in dst; + dst.sin_family = AF_INET; + dst.sin_port = 0; + /* 10.0.2.2 in network byte order (big-endian) = 0x0A000202 */ + dst.sin_addr = 0x0202000AU; /* little-endian representation of 10.0.2.2 */ + for (int i = 0; i < 8; i++) dst.sin_zero[i] = 0; + + int got_reply = 0; + for (int seq = 1; seq <= 3; seq++) { + /* Build ICMP echo request header (8 bytes) */ + uint8_t icmp_req[8]; + icmp_req[0] = 8; /* type = echo request */ + icmp_req[1] = 0; /* code = 0 */ + icmp_req[2] = 0; /* checksum high (calculated below) */ + icmp_req[3] = 0; /* checksum low */ + icmp_req[4] = 0xAD; icmp_req[5] = 0x05; /* ID = 0xAD05 */ + icmp_req[6] = (uint8_t)(seq >> 8); /* seq high */ + icmp_req[7] = (uint8_t)(seq & 0xFF); /* seq low */ + + /* Calculate checksum */ + uint32_t sum = 0; + for (int i = 0; i < 8; i += 2) { + sum += (uint32_t)((uint16_t)icmp_req[i] << 8 | icmp_req[i + 1]); + } + while (sum >> 16) sum = (sum & 0xFFFF) + (sum >> 16); + icmp_req[2] = (uint8_t)(~(sum >> 8) & 0xFF); + icmp_req[3] = (uint8_t)(~sum & 0xFF); + + int sent = sys_sendto(pfd, icmp_req, 8, 0, &dst); + if (sent < 8) { + sys_write(1, "[test] ping sendto failed\n", + (uint32_t)(sizeof("[test] ping sendto failed\n") - 1)); + (void)sys_close(pfd); + sys_exit(1); + } + + /* Wait for reply with timeout using non-blocking recvfrom */ + struct timespec ts_now; + sys_clock_gettime(CLOCK_MONOTONIC, &ts_now); + uint32_t deadline = ts_now.tv_sec + 3; /* 3 second timeout */ + + while (1) { + uint8_t rbuf[128]; + struct sockaddr_in src; + int n = sys_recvfrom(pfd, rbuf, sizeof(rbuf), 0, &src); + if (n < 0) { + /* -EAGAIN: no data yet, check timeout */ + sys_clock_gettime(CLOCK_MONOTONIC, &ts_now); + if (ts_now.tv_sec >= deadline) break; + struct timespec ys = {0, 50000000}; /* 50ms */ + (void)sys_nanosleep(&ys, 0); + continue; + } + if (n >= 28) { + /* IP header (20) + ICMP header (8) minimum */ + uint8_t ihl = (uint8_t)(rbuf[0] & 0x0F); + int ip_hdr_len = ihl * 4; + if (n >= ip_hdr_len + 8) { + uint8_t type = rbuf[ip_hdr_len]; + uint8_t code = rbuf[ip_hdr_len + 1]; + if (type == 0 && code == 0) { /* ICMP echo reply */ + got_reply = 1; + break; + } + } + } + sys_clock_gettime(CLOCK_MONOTONIC, &ts_now); + if (ts_now.tv_sec >= deadline) break; + } + + if (got_reply) break; + } + + (void)sys_close(pfd); + + if (got_reply) { + sys_write(1, "[test] ICMP ping OK\n", + (uint32_t)(sizeof("[test] ICMP ping OK\n") - 1)); + } else { + sys_write(1, "[test] ICMP ping timeout (non-fatal)\n", + (uint32_t)(sizeof("[test] ICMP ping timeout (non-fatal)\n") - 1)); + } + } + // I8: mqueue — open/send/receive/close/unlink { int mqd = sys_mq_open("/test_mq", O_CREAT | O_RDWR); diff --git a/user/cmds/init/init.c b/user/cmds/init/init.c index b8bc4f58..427030a5 100644 --- a/user/cmds/init/init.c +++ b/user/cmds/init/init.c @@ -293,10 +293,78 @@ static void mount_virtual_fs(void) { fprintf(stderr, "init: mount tmpfs on /tmp failed\n"); } +/* Parse /etc/fstab and mount disk-based filesystems. + * Format: device mountpoint fstype options + * Example: /dev/hda /disk diskfs defaults + * Migrated from kernel init_parse_fstab() to userspace. */ +static void parse_fstab(void) { + int fd = open("/etc/fstab", O_RDONLY); + if (fd < 0) return; + + char buf[2048]; + int total = 0, r; + while ((r = read(fd, buf + total, (size_t)(sizeof(buf) - (size_t)total - 1))) > 0) + total += r; + buf[total] = '\0'; + close(fd); + + char* p = buf; + while (*p) { + while (*p == ' ' || *p == '\t') p++; + if (*p == '\0') break; + if (*p == '#' || *p == '\n') { + while (*p && *p != '\n') p++; + if (*p == '\n') p++; + continue; + } + + /* device */ + char device[64] = {0}; + { char* s = p; while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; + int len = (int)(p - s); if (len > 63) len = 63; memcpy(device, s, (size_t)len); } + if (*p == '\n' || *p == '\0') { if (*p == '\n') p++; continue; } + while (*p == ' ' || *p == '\t') p++; + + /* mountpoint */ + char mountpoint[64] = {0}; + { char* s = p; while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; + int len = (int)(p - s); if (len > 63) len = 63; memcpy(mountpoint, s, (size_t)len); } + if (*p == '\n' || *p == '\0') { if (*p == '\n') p++; continue; } + while (*p == ' ' || *p == '\t') p++; + + /* fstype */ + char fstype[32] = {0}; + { char* s = p; while (*p && *p != ' ' && *p != '\t' && *p != '\n') p++; + int len = (int)(p - s); if (len > 31) len = 31; memcpy(fstype, s, (size_t)len); } + + /* Skip rest of line */ + while (*p && *p != '\n') p++; + if (*p == '\n') p++; + + /* Skip virtual FS — already mounted by mount_virtual_fs() */ + if (strcmp(fstype, "devfs") == 0 || strcmp(fstype, "procfs") == 0 + || strcmp(fstype, "tmpfs") == 0 || strcmp(fstype, "overlayfs") == 0) + continue; + + /* Skip if already mounted */ + if (is_mounted(mountpoint)) continue; + + if (mount(device, mountpoint, fstype, 0, NULL) < 0) { + fprintf(stderr, "init: mount %s on %s (%s) failed\n", + device, mountpoint, fstype); + } else { + printf("init: mounted %s on %s (%s)\n", device, mountpoint, fstype); + } + } +} + static void default_init(void) { /* Mount virtual filesystems before anything else */ mount_virtual_fs(); + /* Mount disk filesystems from /etc/fstab */ + parse_fstab(); + /* Run /etc/init.d/rcS if it exists */ if (access("/etc/init.d/rcS", 0) == 0) { run_and_wait("/etc/init.d/rcS"); @@ -356,6 +424,9 @@ int main(int argc, char** argv) { /* Mount virtual filesystems before running any inittab entries */ mount_virtual_fs(); + /* Mount disk filesystems from /etc/fstab */ + parse_fstab(); + /* Phase 1: sysinit entries */ run_action(ACT_SYSINIT, 1);