From 6774a3ad13d8874cae2001e976046323affaaafe Mon Sep 17 00:00:00 2001 From: Tulio A M Mendes Date: Fri, 13 Feb 2026 15:37:02 -0300 Subject: [PATCH] refactor: add VFS poll callback to fs_node_t, eliminate abstraction leaks from syscall.c - Add int (*poll)(struct fs_node*, int events) to fs_node_t in fs.h - Define VFS_POLL_IN/OUT/ERR/HUP constants in fs.h (shared) - Implement poll callbacks: pipe_poll, tty_devfs_poll, pty_master/slave_poll_fn, dev_null_poll, dev_always_ready_poll, kbd_dev_poll - Wire poll into all device nodes: /dev/null, /dev/zero, /dev/random, /dev/urandom, /dev/tty, /dev/console, /dev/ptmx, /dev/pts/N, /dev/kbd, pipe nodes - Refactor poll_wait_kfds: dispatch through node->poll instead of hardcoded pipe name prefix, tty inode==3, pty_is_master/slave_ino checks - Refactor non-blocking read/write: use node->poll instead of pipe name checks and tty/pty inode checks - syscall.c no longer references tty_can_read/write, pty_*_can_read/write_idx, pty_is_master_ino, pty_ino_to_idx for poll/nonblock purposes --- include/fs.h | 7 +++ src/drivers/keyboard.c | 13 +++++ src/kernel/devfs.c | 20 +++++++ src/kernel/pty.c | 21 ++++++++ src/kernel/syscall.c | 118 +++++++++++++++-------------------------- src/kernel/tty.c | 10 ++++ 6 files changed, 114 insertions(+), 75 deletions(-) diff --git a/include/fs.h b/include/fs.h index 1c8d8961..0d9af7bc 100644 --- a/include/fs.h +++ b/include/fs.h @@ -19,6 +19,12 @@ #define FS_BLOCKDEVICE 0x04 #define FS_SYMLINK 0x05 +/* poll() event flags — shared between kernel VFS and syscall layer */ +#define VFS_POLL_IN 0x0001 +#define VFS_POLL_OUT 0x0004 +#define VFS_POLL_ERR 0x0008 +#define VFS_POLL_HUP 0x0010 + typedef struct fs_node { char name[128]; uint32_t flags; @@ -38,6 +44,7 @@ typedef struct fs_node { int (*readdir)(struct fs_node* node, uint32_t* inout_index, void* buf, uint32_t buf_len); int (*ioctl)(struct fs_node* node, uint32_t cmd, void* arg); uintptr_t (*mmap)(struct fs_node* node, uintptr_t addr, uint32_t length, uint32_t prot, uint32_t offset); + int (*poll)(struct fs_node* node, int events); // Directory mutation operations (called on the parent directory node) int (*create)(struct fs_node* dir, const char* name, uint32_t flags, struct fs_node** out); diff --git a/src/drivers/keyboard.c b/src/drivers/keyboard.c index 7b31431f..623134a9 100644 --- a/src/drivers/keyboard.c +++ b/src/drivers/keyboard.c @@ -94,6 +94,18 @@ static uint32_t kbd_dev_read(fs_node_t* node, uint32_t offset, uint32_t size, ui return count; } +static int kbd_dev_poll(fs_node_t* node, int events) { + (void)node; + int revents = 0; + if (events & VFS_POLL_IN) { + uintptr_t flags = spin_lock_irqsave(&scan_lock); + if (scan_head != scan_tail) revents |= VFS_POLL_IN; + spin_unlock_irqrestore(&scan_lock, flags); + } + if (events & VFS_POLL_OUT) revents |= VFS_POLL_OUT; + return revents; +} + static fs_node_t g_dev_kbd_node; void keyboard_init(void) { @@ -115,6 +127,7 @@ void keyboard_register_devfs(void) { g_dev_kbd_node.flags = FS_CHARDEVICE; g_dev_kbd_node.inode = 21; g_dev_kbd_node.read = &kbd_dev_read; + g_dev_kbd_node.poll = &kbd_dev_poll; devfs_register_device(&g_dev_kbd_node); } diff --git a/src/kernel/devfs.c b/src/kernel/devfs.c index dad898a3..cf73b125 100644 --- a/src/kernel/devfs.c +++ b/src/kernel/devfs.c @@ -104,6 +104,22 @@ static uint32_t dev_random_write(fs_node_t* node, uint32_t offset, uint32_t size return size; } +static int dev_null_poll(fs_node_t* node, int events) { + (void)node; + int revents = 0; + if (events & VFS_POLL_IN) revents |= VFS_POLL_IN | VFS_POLL_HUP; + if (events & VFS_POLL_OUT) revents |= VFS_POLL_OUT; + return revents; +} + +static int dev_always_ready_poll(fs_node_t* node, int events) { + (void)node; + int revents = 0; + if (events & VFS_POLL_IN) revents |= VFS_POLL_IN; + if (events & VFS_POLL_OUT) revents |= VFS_POLL_OUT; + return revents; +} + static struct fs_node* devfs_finddir_impl(struct fs_node* node, const char* name) { (void)node; if (!name || name[0] == 0) return 0; @@ -196,6 +212,7 @@ static void devfs_init_once(void) { g_dev_null.length = 0; g_dev_null.read = &dev_null_read; g_dev_null.write = &dev_null_write; + g_dev_null.poll = &dev_null_poll; g_dev_null.open = 0; g_dev_null.close = 0; g_dev_null.finddir = 0; @@ -206,6 +223,7 @@ static void devfs_init_once(void) { g_dev_zero.inode = 7; g_dev_zero.read = &dev_zero_read; g_dev_zero.write = &dev_zero_write; + g_dev_zero.poll = &dev_always_ready_poll; memset(&g_dev_random, 0, sizeof(g_dev_random)); strcpy(g_dev_random.name, "random"); @@ -213,6 +231,7 @@ static void devfs_init_once(void) { g_dev_random.inode = 8; g_dev_random.read = &dev_random_read; g_dev_random.write = &dev_random_write; + g_dev_random.poll = &dev_always_ready_poll; memset(&g_dev_urandom, 0, sizeof(g_dev_urandom)); strcpy(g_dev_urandom.name, "urandom"); @@ -220,6 +239,7 @@ static void devfs_init_once(void) { g_dev_urandom.inode = 9; g_dev_urandom.read = &dev_random_read; g_dev_urandom.write = &dev_random_write; + g_dev_urandom.poll = &dev_always_ready_poll; } fs_node_t* devfs_create_root(void) { diff --git a/src/kernel/pty.c b/src/kernel/pty.c index 0b199329..75ff7033 100644 --- a/src/kernel/pty.c +++ b/src/kernel/pty.c @@ -91,6 +91,24 @@ static uint32_t pty_slave_read_fn(fs_node_t* node, uint32_t offset, uint32_t siz static uint32_t pty_slave_write_fn(fs_node_t* node, uint32_t offset, uint32_t size, const uint8_t* buffer); static int pty_slave_ioctl_fn(fs_node_t* node, uint32_t cmd, void* arg); +static int pty_master_poll_fn(fs_node_t* node, int events) { + int idx = pty_ino_to_idx(node->inode); + if (idx < 0) return 0; + int revents = 0; + if ((events & VFS_POLL_IN) && pty_master_can_read_idx(idx)) revents |= VFS_POLL_IN; + if ((events & VFS_POLL_OUT) && pty_master_can_write_idx(idx)) revents |= VFS_POLL_OUT; + return revents; +} + +static int pty_slave_poll_fn(fs_node_t* node, int events) { + int idx = pty_ino_to_idx(node->inode); + if (idx < 0) return 0; + int revents = 0; + if ((events & VFS_POLL_IN) && pty_slave_can_read_idx(idx)) revents |= VFS_POLL_IN; + if ((events & VFS_POLL_OUT) && pty_slave_can_write_idx(idx)) revents |= VFS_POLL_OUT; + return revents; +} + static void pty_init_pair(int idx) { struct pty_pair* p = &g_ptys[idx]; memset(p, 0, sizeof(*p)); @@ -104,6 +122,7 @@ static void pty_init_pair(int idx) { p->master_node.inode = PTY_MASTER_INO_BASE + (uint32_t)idx; p->master_node.read = &pty_master_read_fn; p->master_node.write = &pty_master_write_fn; + p->master_node.poll = &pty_master_poll_fn; memset(&p->slave_node, 0, sizeof(p->slave_node)); name[0] = '0' + (char)idx; @@ -114,6 +133,7 @@ static void pty_init_pair(int idx) { p->slave_node.read = &pty_slave_read_fn; p->slave_node.write = &pty_slave_write_fn; p->slave_node.ioctl = &pty_slave_ioctl_fn; + p->slave_node.poll = &pty_slave_poll_fn; } /* --- DevFS pts directory callbacks --- */ @@ -206,6 +226,7 @@ void pty_init(void) { g_dev_ptmx_node.inode = PTY_MASTER_INO_BASE; g_dev_ptmx_node.read = &pty_ptmx_read_fn; g_dev_ptmx_node.write = &pty_ptmx_write_fn; + g_dev_ptmx_node.poll = &pty_master_poll_fn; devfs_register_device(&g_dev_ptmx_node); /* Register /dev/pts directory */ diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index 331a3ebe..43f55814 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -255,10 +255,6 @@ static int poll_wait_kfds(struct pollfd* kfds, uint32_t nfds, int32_t timeout) { if (!kfds) return -EINVAL; if (nfds > 64U) return -EINVAL; - // timeout semantics (minimal): - // - timeout == 0 : non-blocking - // - timeout < 0 : block forever - // - timeout > 0 : treated as "ticks" (best-effort) extern uint32_t get_tick_count(void); uint32_t start_tick = get_tick_count(); @@ -272,52 +268,25 @@ static int poll_wait_kfds(struct pollfd* kfds, uint32_t nfds, int32_t timeout) { struct file* f = fd_get(fd); if (!f || !f->node) { kfds[i].revents |= POLLERR; + ready++; continue; } fs_node_t* n = f->node; - // Pipes (identified by node name prefix). - if (n->name[0] == 'p' && n->name[1] == 'i' && n->name[2] == 'p' && n->name[3] == 'e' && n->name[4] == ':') { - struct pipe_node* pn = (struct pipe_node*)n; - struct pipe_state* ps = pn->ps; - if (!ps) { - kfds[i].revents |= POLLERR; - } else if (pn->is_read_end) { - if ((kfds[i].events & POLLIN) && (ps->count > 0 || ps->writers == 0)) { - kfds[i].revents |= POLLIN; - if (ps->writers == 0) kfds[i].revents |= POLLHUP; - } - } else { - if (ps->readers == 0) { - if (kfds[i].events & POLLOUT) kfds[i].revents |= POLLERR; - } else { - uint32_t free = ps->cap - ps->count; - if ((kfds[i].events & POLLOUT) && free > 0) { - kfds[i].revents |= POLLOUT; - } - } - } - } else if (n->flags == FS_CHARDEVICE) { - // devfs devices: inode 2=/dev/null, 3=/dev/tty - if (n->inode == 2) { - if (kfds[i].events & POLLIN) kfds[i].revents |= POLLIN | POLLHUP; - if (kfds[i].events & POLLOUT) kfds[i].revents |= POLLOUT; - } else if (n->inode == 3) { - if ((kfds[i].events & POLLIN) && tty_can_read()) kfds[i].revents |= POLLIN; - if ((kfds[i].events & POLLOUT) && tty_can_write()) kfds[i].revents |= POLLOUT; - } else if (pty_is_master_ino(n->inode)) { - int pi = pty_ino_to_idx(n->inode); - if ((kfds[i].events & POLLIN) && pty_master_can_read_idx(pi)) kfds[i].revents |= POLLIN; - if ((kfds[i].events & POLLOUT) && pty_master_can_write_idx(pi)) kfds[i].revents |= POLLOUT; - } else if (pty_is_slave_ino(n->inode)) { - int pi = pty_ino_to_idx(n->inode); - if ((kfds[i].events & POLLIN) && pty_slave_can_read_idx(pi)) kfds[i].revents |= POLLIN; - if ((kfds[i].events & POLLOUT) && pty_slave_can_write_idx(pi)) kfds[i].revents |= POLLOUT; - } + if (n->poll) { + int vfs_events = 0; + if (kfds[i].events & POLLIN) vfs_events |= VFS_POLL_IN; + if (kfds[i].events & POLLOUT) vfs_events |= VFS_POLL_OUT; + + int vfs_rev = n->poll(n, vfs_events); + + if (vfs_rev & VFS_POLL_IN) kfds[i].revents |= POLLIN; + if (vfs_rev & VFS_POLL_OUT) kfds[i].revents |= POLLOUT; + if (vfs_rev & VFS_POLL_ERR) kfds[i].revents |= POLLERR; + if (vfs_rev & VFS_POLL_HUP) kfds[i].revents |= POLLHUP; } else { - // Regular files are always readable/writable (best-effort). - if (kfds[i].events & POLLIN) kfds[i].revents |= POLLIN; + if (kfds[i].events & POLLIN) kfds[i].revents |= POLLIN; if (kfds[i].events & POLLOUT) kfds[i].revents |= POLLOUT; } @@ -418,6 +387,27 @@ static void pipe_close(fs_node_t* n) { } } +static int pipe_poll(fs_node_t* n, int events) { + struct pipe_node* pn = (struct pipe_node*)n; + if (!pn || !pn->ps) return VFS_POLL_ERR; + struct pipe_state* ps = pn->ps; + int revents = 0; + if (pn->is_read_end) { + if ((events & VFS_POLL_IN) && (ps->count > 0 || ps->writers == 0)) { + revents |= VFS_POLL_IN; + if (ps->writers == 0) revents |= VFS_POLL_HUP; + } + } else { + if (ps->readers == 0) { + if (events & VFS_POLL_OUT) revents |= VFS_POLL_ERR; + } else { + uint32_t free = ps->cap - ps->count; + if ((events & VFS_POLL_OUT) && free > 0) revents |= VFS_POLL_OUT; + } + } + return revents; +} + static int pipe_node_create(struct pipe_state* ps, int is_read_end, fs_node_t** out_node) { if (!ps || !out_node) return -EINVAL; struct pipe_node* pn = (struct pipe_node*)kmalloc(sizeof(*pn)); @@ -431,6 +421,7 @@ static int pipe_node_create(struct pipe_state* ps, int is_read_end, fs_node_t** pn->node.open = NULL; pn->node.finddir = NULL; pn->node.close = pipe_close; + pn->node.poll = pipe_poll; if (pn->is_read_end) { strcpy(pn->node.name, "pipe:r"); pn->node.read = pipe_read; @@ -1229,26 +1220,10 @@ static int syscall_read_impl(int fd, void* user_buf, uint32_t len) { if (!f || !f->node) return -EBADF; int nonblock = (f->flags & O_NONBLOCK) ? 1 : 0; - if (nonblock) { - // Non-blocking pipes: if empty but writers exist, return -EAGAIN. - if (f->node->name[0] == 'p' && f->node->name[1] == 'i' && f->node->name[2] == 'p' && f->node->name[3] == 'e' && f->node->name[4] == ':') { - struct pipe_node* pn = (struct pipe_node*)f->node; - struct pipe_state* ps = pn ? pn->ps : 0; - if (pn && ps && pn->is_read_end && ps->count == 0 && ps->writers != 0) { - return -EAGAIN; - } - } - - // Non-blocking char devices (tty/pty) need special handling, since devfs read blocks. - if (f->node->flags == FS_CHARDEVICE) { - if (f->node->inode == 3) { - if (!tty_can_read()) return -EAGAIN; - } else if (pty_is_master_ino(f->node->inode)) { - if (!pty_master_can_read_idx(pty_ino_to_idx(f->node->inode))) return -EAGAIN; - } else if (pty_is_slave_ino(f->node->inode)) { - if (!pty_slave_can_read_idx(pty_ino_to_idx(f->node->inode))) return -EAGAIN; - } - } + if (nonblock && f->node->poll) { + int rev = f->node->poll(f->node, VFS_POLL_IN); + if (!(rev & (VFS_POLL_IN | VFS_POLL_ERR | VFS_POLL_HUP))) + return -EAGAIN; } if (f->node->flags == FS_CHARDEVICE) { @@ -1311,17 +1286,10 @@ static int syscall_write_impl(int fd, const void* user_buf, uint32_t len) { if (!f || !f->node) return -EBADF; int nonblock = (f->flags & O_NONBLOCK) ? 1 : 0; - if (nonblock) { - // Non-blocking pipe write: if full but readers exist, return -EAGAIN. - if (f->node->name[0] == 'p' && f->node->name[1] == 'i' && f->node->name[2] == 'p' && f->node->name[3] == 'e' && f->node->name[4] == ':') { - struct pipe_node* pn = (struct pipe_node*)f->node; - struct pipe_state* ps = pn ? pn->ps : 0; - if (pn && ps && !pn->is_read_end) { - if (ps->readers != 0 && (ps->cap - ps->count) == 0) { - return -EAGAIN; - } - } - } + if (nonblock && f->node->poll) { + int rev = f->node->poll(f->node, VFS_POLL_OUT); + if (!(rev & (VFS_POLL_OUT | VFS_POLL_ERR))) + return -EAGAIN; } if (!f->node->write) return -ESPIPE; if (((f->node->flags & FS_FILE) == 0) && f->node->flags != FS_CHARDEVICE) return -ESPIPE; diff --git a/src/kernel/tty.c b/src/kernel/tty.c index eae85457..5f3f70ca 100644 --- a/src/kernel/tty.c +++ b/src/kernel/tty.c @@ -420,6 +420,14 @@ static int tty_devfs_ioctl(fs_node_t* node, uint32_t cmd, void* arg) { return tty_ioctl(cmd, arg); } +static int tty_devfs_poll(fs_node_t* node, int events) { + (void)node; + int revents = 0; + if ((events & VFS_POLL_IN) && tty_can_read()) revents |= VFS_POLL_IN; + if ((events & VFS_POLL_OUT) && tty_can_write()) revents |= VFS_POLL_OUT; + return revents; +} + void tty_init(void) { spinlock_init(&tty_lock); line_len = 0; @@ -438,6 +446,7 @@ void tty_init(void) { g_dev_console_node.read = &tty_devfs_read; g_dev_console_node.write = &tty_devfs_write; g_dev_console_node.ioctl = &tty_devfs_ioctl; + g_dev_console_node.poll = &tty_devfs_poll; devfs_register_device(&g_dev_console_node); /* Register /dev/tty */ @@ -448,6 +457,7 @@ void tty_init(void) { g_dev_tty_node.read = &tty_devfs_read; g_dev_tty_node.write = &tty_devfs_write; g_dev_tty_node.ioctl = &tty_devfs_ioctl; + g_dev_tty_node.poll = &tty_devfs_poll; devfs_register_device(&g_dev_tty_node); } -- 2.43.0