From e93e60ca6ca9695e60f8b3bce027b9d74e1b5e77 Mon Sep 17 00:00:00 2001 From: Tulio A M Mendes Date: Mon, 4 May 2026 01:41:13 -0300 Subject: [PATCH] kernel: add VFS spinlock (g_vfs_lock) to fix SMP mount-table races The VFS mount table (g_mounts[], g_mount_count, fs_root) had zero locking protection. On SMP (4 CPUs), concurrent access during pivot_root/mount/umount2 corrupted the mount table, causing: - Mount points disappearing: vfs_umount shifts array entries while another CPU's vfs_lookup iterates, skipping/duplicating entries - Sporadic QEMU reboot: corrupted fs_node_t* pointer from torn reads causes kernel page fault -> triple fault -> reboot Fix: - Add spinlock_t g_vfs_lock in fs.c protecting fs_root, g_mounts[], and g_mount_count - Create _nolock variants (vfs_mount_nolock, vfs_umount_nolock) for compound operations that need atomicity across multiple mutations - Public vfs_mount/vfs_umount acquire the lock automatically - vfs_lookup_depth snapshots mount table under lock before traversal - SYSCALL_PIVOT_ROOT holds g_vfs_lock across the entire compound mutation (fs_root update + two vfs_mount_nolock calls) - Export g_vfs_lock and _nolock variants in fs.h Also: - fulltest pivot_root test: undo pivot_root in child before exit, verify /dev/null accessible after undo - ulibc strrchr: add null-pointer guard (cppcheck ctunullpointer) - fulltest: struct init {0} instead of {{0}} (cppcheck uninitvar) --- include/fs.h | 11 +++++++++++ src/kernel/fs.c | 34 +++++++++++++++++++++++++++++++--- src/kernel/syscall.c | 9 +++++++-- user/cmds/fulltest/fulltest.c | 32 ++++++++++++++++++++++++++++++-- user/ulibc/src/string.c | 1 + 5 files changed, 80 insertions(+), 7 deletions(-) diff --git a/include/fs.h b/include/fs.h index ad378aac..ded10d7a 100644 --- a/include/fs.h +++ b/include/fs.h @@ -12,6 +12,7 @@ #include #include +#include "spinlock.h" #define FS_FILE 0x01 #define FS_DIRECTORY 0x02 @@ -101,6 +102,16 @@ int vfs_link(const char* old_path, const char* new_path); int vfs_mount(const char* mountpoint, fs_node_t* root); int vfs_umount(const char* mountpoint); +/* _nolock variants — caller must already hold g_vfs_lock. + * Used by compound operations like pivot_root that need atomicity + * across multiple mount-table modifications. */ +int vfs_mount_nolock(const char* mountpoint, fs_node_t* root); +int vfs_umount_nolock(const char* mountpoint); + +/* Global VFS spinlock — protects fs_root, g_mounts[], g_mount_count. + * Acquire via spin_lock_irqsave() for any compound VFS mutation. */ +extern spinlock_t g_vfs_lock; + // Global root of the filesystem extern fs_node_t* fs_root; diff --git a/src/kernel/fs.c b/src/kernel/fs.c index 84379527..8aa7545b 100644 --- a/src/kernel/fs.c +++ b/src/kernel/fs.c @@ -11,10 +11,16 @@ #include "utils.h" #include "errno.h" +#include "spinlock.h" fs_node_t* fs_root = NULL; static fs_node_t* g_initrd_root = NULL; +/* Global VFS spinlock — protects fs_root, g_mounts[], and g_mount_count. + * Must be held across any compound operation (e.g. pivot_root) that + * modifies more than one of these fields. */ +spinlock_t g_vfs_lock; + struct vfs_mount { char mountpoint[128]; fs_node_t* root; @@ -59,7 +65,7 @@ static void normalize_mountpoint(const char* in, char* out, size_t out_sz) { } } -int vfs_mount(const char* mountpoint, fs_node_t* root) { +int vfs_mount_nolock(const char* mountpoint, fs_node_t* root) { if (!root) return -EINVAL; if (g_mount_count >= (int)(sizeof(g_mounts) / sizeof(g_mounts[0]))) return -ENOSPC; @@ -79,7 +85,14 @@ int vfs_mount(const char* mountpoint, fs_node_t* root) { return 0; } -int vfs_umount(const char* mountpoint) { +int vfs_mount(const char* mountpoint, fs_node_t* root) { + uintptr_t fl = spin_lock_irqsave(&g_vfs_lock); + int ret = vfs_mount_nolock(mountpoint, root); + spin_unlock_irqrestore(&g_vfs_lock, fl); + return ret; +} + +int vfs_umount_nolock(const char* mountpoint) { char mp[128]; normalize_mountpoint(mountpoint, mp, sizeof(mp)); @@ -96,6 +109,13 @@ int vfs_umount(const char* mountpoint) { return -EINVAL; } +int vfs_umount(const char* mountpoint) { + uintptr_t fl = spin_lock_irqsave(&g_vfs_lock); + int ret = vfs_umount_nolock(mountpoint); + spin_unlock_irqrestore(&g_vfs_lock, fl); + return ret; +} + uint32_t vfs_read(fs_node_t* node, uint32_t offset, uint32_t size, uint8_t* buffer) { if (node->f_ops && node->f_ops->read) return node->f_ops->read(node, offset, size, buffer); @@ -160,9 +180,15 @@ fs_node_t* vfs_lookup_initrd(const char* path) { } static fs_node_t* vfs_lookup_depth(const char* path, int depth) { - if (!path || !fs_root) return NULL; + if (!path) return NULL; if (depth > 8) return NULL; + /* Snapshot mount-table state under the VFS lock so that concurrent + * mount/umount/pivot_root on another CPU cannot corrupt our view. */ + uintptr_t fl = spin_lock_irqsave(&g_vfs_lock); + + if (!fs_root) { spin_unlock_irqrestore(&g_vfs_lock, fl); return NULL; } + fs_node_t* base = fs_root; const char* rel = path; size_t best_len = 0; @@ -181,6 +207,8 @@ static fs_node_t* vfs_lookup_depth(const char* path, int depth) { } } + spin_unlock_irqrestore(&g_vfs_lock, fl); + if (!rel) return NULL; while (*rel == '/') rel++; if (*rel == 0) return base; diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index 78ca1120..5017eb83 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -4878,12 +4878,17 @@ static void socket_syscall_dispatch(struct registers* regs, uint32_t syscall_no) sc_ret(regs) = (uint32_t)-EINVAL; return; } + /* Hold g_vfs_lock across the compound mutation so that + * concurrent vfs_lookup() on other CPUs never sees an + * inconsistent (fs_root, mount-table) pair. */ + uintptr_t vfs_fl = spin_lock_irqsave(&g_vfs_lock); fs_node_t* old_root = fs_root; fs_root = new_root; - (void)vfs_mount("/", new_root); + (void)vfs_mount_nolock("/", new_root); if (old_root) { - (void)vfs_mount(kput, old_root); + (void)vfs_mount_nolock(kput, old_root); } + spin_unlock_irqrestore(&g_vfs_lock, vfs_fl); sc_ret(regs) = 0; return; } diff --git a/user/cmds/fulltest/fulltest.c b/user/cmds/fulltest/fulltest.c index f5b82bf4..8fdc5aa5 100644 --- a/user/cmds/fulltest/fulltest.c +++ b/user/cmds/fulltest/fulltest.c @@ -606,7 +606,7 @@ static int sys_sigaction(int sig, void (*handler)(int), uintptr_t* old_out) { act.sa_mask = 0; act.sa_flags = 0; - struct sigaction oldact = {{0}}; + struct sigaction oldact = {0}; struct sigaction* oldp = old_out ? &oldact : 0; int r = sys_sigaction2(sig, &act, oldp); @@ -5218,7 +5218,7 @@ void _start(void) { (uint32_t)(sizeof("[test] execveat OK\n") - 1)); } - // I17: pivot_root — mount tmpfs + pivot_root + verify (isolated fork) + // I17: pivot_root — mount tmpfs + pivot_root + undo (isolated fork) { int pid = sys_fork(); if (pid < 0) { @@ -5243,6 +5243,34 @@ void _start(void) { sys_exit(1); } + /* Undo pivot_root to restore global VFS state. + * Without per-process mount namespaces, pivot_root modifies + * the global fs_root and "/" mount entry. After the child + * exits the parent (and the shell) would see an empty tmpfs + * as "/", breaking all command lookups via access()/vfs_lookup(). + * Pivot back so the overlayfs root is restored. */ + (void)sys_mkdir("/undo_dir"); + if (sys_pivot_root("/tmp/pivot_old", "/undo_dir") < 0) { + sys_write(1, "[test] pivot_root undo failed\n", + (uint32_t)(sizeof("[test] pivot_root undo failed\n") - 1)); + sys_exit(1); + } + (void)sys_umount2("/undo_dir"); + (void)sys_umount2("/tmp/pivot_old"); + (void)sys_umount2("/tmp/pivot_new"); + + /* Verify that the original mount points are accessible + * after undoing pivot_root. */ + { + int vfd = sys_open("/dev/null", 0); + if (vfd < 0) { + sys_write(1, "[test] pivot_root undo verify /dev failed\n", + (uint32_t)(sizeof("[test] pivot_root undo verify /dev failed\n") - 1)); + sys_exit(1); + } + (void)sys_close(vfd); + } + sys_exit(0); } int st = 0; diff --git a/user/ulibc/src/string.c b/user/ulibc/src/string.c index 9fd94154..94460915 100644 --- a/user/ulibc/src/string.c +++ b/user/ulibc/src/string.c @@ -84,6 +84,7 @@ char* strchr(const char* s, int c) { } char* strrchr(const char* s, int c) { + if (!s) return (void*)0; const char* last = (void*)0; while (*s) { if (*s == (char)c) last = s; -- 2.43.0