From: Tulio A M Mendes Date: Sat, 14 Feb 2026 05:56:56 +0000 (-0300) Subject: perf: VGA shadow buffer + batched TTY output — eliminates MMIO bottleneck X-Git-Url: https://projects.tadryanom.me/docs/static/git-logo.png?a=commitdiff_plain;h=619cf907568dc2f996cf3364687c55e0f488cd19;p=AdrOS.git perf: VGA shadow buffer + batched TTY output — eliminates MMIO bottleneck VGA console was extremely slow in QEMU because every character caused: - 4 outb I/O port writes for cursor update - Direct writes to VGA MMIO (0xB8000) which QEMU traps per-access - Full-screen memmove on MMIO for each scroll Three-layer optimization: 1. Shadow buffer: all VGA writes target a RAM shadow[] array. Only dirty cells are flushed to VGA MMIO. Scrolling uses RAM-speed memmove instead of MMIO memmove. 2. Batched TTY output: tty_write_kbuf/tty_write now OPOST-expand into a local buffer and call console_write_buf() once per chunk instead of console_put_char() per character. VGA cursor is updated once per batch, not per character. 3. Deferred flush: vga_write_buf() (bulk TTY path) does NOT flush to VGA MMIO at all. Screen is refreshed at 50Hz via vga_flush() called from the timer tick. Single-char paths (echo, kprintf) still flush immediately for responsiveness. Result: 20/20 smoke tests in 8s WITHOUT console=serial (was timing out at 90s before). The console=serial workaround is no longer needed. Files changed: - src/drivers/vga_console.c: shadow buffer, dirty tracking, flush - src/drivers/timer.c: periodic vga_flush() on tick - src/kernel/tty.c: tty_opost_expand + console_write_buf batching - src/kernel/console.c: new console_write_buf() - include/vga_console.h: vga_write_buf, vga_flush declarations - include/console.h: console_write_buf declaration - iso/boot/grub/grub.cfg: removed console=serial workaround --- diff --git a/include/console.h b/include/console.h index 7829470..1fbdbae 100644 --- a/include/console.h +++ b/include/console.h @@ -2,6 +2,7 @@ #define CONSOLE_H #include +#include #include void console_init(void); @@ -9,6 +10,7 @@ void console_enable_uart(int enabled); void console_enable_vga(int enabled); void console_write(const char* s); +void console_write_buf(const char* buf, uint32_t len); void console_put_char(char c); int kvsnprintf(char* out, size_t out_size, const char* fmt, va_list ap); diff --git a/include/vga_console.h b/include/vga_console.h index c4d3728..841cdb0 100644 --- a/include/vga_console.h +++ b/include/vga_console.h @@ -5,8 +5,10 @@ void vga_init(void); void vga_put_char(char c); +void vga_write_buf(const char* buf, uint32_t len); void vga_print(const char* str); void vga_set_color(uint8_t fg, uint8_t bg); +void vga_flush(void); void vga_clear(void); void vga_scroll_back(void); void vga_scroll_fwd(void); diff --git a/iso/boot/grub/grub.cfg b/iso/boot/grub/grub.cfg index 7de57c2..de40d36 100644 --- a/iso/boot/grub/grub.cfg +++ b/iso/boot/grub/grub.cfg @@ -12,7 +12,7 @@ menuentry "AdrOS (x86)" { } menuentry "AdrOS (x86) - ring3 test" { - multiboot2 /boot/adros-x86.bin ring3 console=serial + multiboot2 /boot/adros-x86.bin ring3 module2 /boot/initrd.img boot } diff --git a/src/drivers/timer.c b/src/drivers/timer.c index d830139..12137e2 100644 --- a/src/drivers/timer.c +++ b/src/drivers/timer.c @@ -2,6 +2,7 @@ #include "console.h" #include "process.h" #include "vdso.h" +#include "vga_console.h" #include "hal/timer.h" @@ -14,6 +15,7 @@ uint32_t get_tick_count(void) { static void hal_tick_bridge(void) { tick++; vdso_update_tick(tick); + vga_flush(); process_wake_check(tick); schedule(); } diff --git a/src/drivers/vga_console.c b/src/drivers/vga_console.c index fe1f080..3b550d3 100644 --- a/src/drivers/vga_console.c +++ b/src/drivers/vga_console.c @@ -7,6 +7,7 @@ static volatile uint16_t* VGA_BUFFER = 0; #define VGA_WIDTH 80 #define VGA_HEIGHT 25 +#define VGA_CELLS (VGA_WIDTH * VGA_HEIGHT) static int term_col = 0; static int term_row = 0; @@ -14,6 +15,27 @@ static uint8_t term_color = 0x0F; // White on Black static spinlock_t vga_lock = {0}; +/* Shadow buffer in RAM — all writes target this, flushed to VGA MMIO lazily */ +static uint16_t shadow[VGA_CELLS]; +static int dirty_lo = VGA_CELLS; /* first dirty cell index */ +static int dirty_hi = -1; /* last dirty cell index */ + +static void dirty_mark(int lo, int hi) { + if (lo < dirty_lo) dirty_lo = lo; + if (hi > dirty_hi) dirty_hi = hi; +} + +static void vga_flush_to_hw(void) { + if (dirty_lo <= dirty_hi && VGA_BUFFER) { + for (int i = dirty_lo; i <= dirty_hi; i++) { + VGA_BUFFER[i] = shadow[i]; + } + dirty_lo = VGA_CELLS; + dirty_hi = -1; + } + hal_video_set_cursor(term_row, term_col); +} + /* --- Scrollback buffer --- */ #define SB_LINES 200 static uint16_t sb_buf[SB_LINES * VGA_WIDTH]; @@ -21,38 +43,32 @@ static int sb_head = 0; /* next write line (circular) */ static int sb_count = 0; /* stored lines (max SB_LINES) */ static int view_offset = 0; /* 0 = live view, >0 = scrolled back N lines */ -static uint16_t live_buf[VGA_HEIGHT * VGA_WIDTH]; /* saved live VGA when scrolled */ - -static void vga_update_hw_cursor(void) { - hal_video_set_cursor(term_row, term_col); -} +static uint16_t live_buf[VGA_CELLS]; /* saved live screen when scrolled */ static void vga_scroll(void) { /* Save row 0 (about to be lost) into scrollback ring */ - for (int x = 0; x < VGA_WIDTH; x++) { - sb_buf[sb_head * VGA_WIDTH + x] = VGA_BUFFER[x]; - } + __builtin_memcpy(&sb_buf[sb_head * VGA_WIDTH], &shadow[0], + VGA_WIDTH * sizeof(uint16_t)); sb_head = (sb_head + 1) % SB_LINES; if (sb_count < SB_LINES) sb_count++; - /* Shift visible content up */ - for (int y = 1; y < VGA_HEIGHT; y++) { - for (int x = 0; x < VGA_WIDTH; x++) { - VGA_BUFFER[(y - 1) * VGA_WIDTH + x] = VGA_BUFFER[y * VGA_WIDTH + x]; - } - } - for (int x = 0; x < VGA_WIDTH; x++) { - VGA_BUFFER[(VGA_HEIGHT - 1) * VGA_WIDTH + x] = (uint16_t)' ' | (uint16_t)term_color << 8; + /* Shift shadow content up (RAM speed — no MMIO) */ + __builtin_memmove(&shadow[0], &shadow[VGA_WIDTH], + (VGA_HEIGHT - 1) * VGA_WIDTH * sizeof(uint16_t)); + { + uint16_t blank = (uint16_t)' ' | (uint16_t)term_color << 8; + for (int x = 0; x < VGA_WIDTH; x++) + shadow[(VGA_HEIGHT - 1) * VGA_WIDTH + x] = blank; } + dirty_mark(0, VGA_CELLS - 1); term_row = VGA_HEIGHT - 1; } /* Restore live view if currently scrolled back */ static void vga_unscroll(void) { if (view_offset > 0) { - for (int i = 0; i < VGA_HEIGHT * VGA_WIDTH; i++) { - VGA_BUFFER[i] = live_buf[i]; - } + __builtin_memcpy(shadow, live_buf, sizeof(shadow)); + dirty_mark(0, VGA_CELLS - 1); view_offset = 0; } } @@ -69,22 +85,23 @@ static void render_scrollback_view(void) { if (line_from_end < VGA_HEIGHT) { /* Live content */ int live_row = VGA_HEIGHT - 1 - line_from_end; - for (int x = 0; x < VGA_WIDTH; x++) { - VGA_BUFFER[y * VGA_WIDTH + x] = live_buf[live_row * VGA_WIDTH + x]; + if (VGA_BUFFER) { + for (int x = 0; x < VGA_WIDTH; x++) + VGA_BUFFER[y * VGA_WIDTH + x] = live_buf[live_row * VGA_WIDTH + x]; } } else { /* Scrollback: sb_idx 0 = most recent scrolled-off line */ int sb_idx = line_from_end - VGA_HEIGHT; if (sb_idx < sb_count) { int buf_line = (sb_head - 1 - sb_idx + SB_LINES) % SB_LINES; - for (int x = 0; x < VGA_WIDTH; x++) { - VGA_BUFFER[y * VGA_WIDTH + x] = sb_buf[buf_line * VGA_WIDTH + x]; + if (VGA_BUFFER) { + for (int x = 0; x < VGA_WIDTH; x++) + VGA_BUFFER[y * VGA_WIDTH + x] = sb_buf[buf_line * VGA_WIDTH + x]; } - } else { + } else if (VGA_BUFFER) { /* Beyond scrollback — blank */ - for (int x = 0; x < VGA_WIDTH; x++) { + for (int x = 0; x < VGA_WIDTH; x++) VGA_BUFFER[y * VGA_WIDTH + x] = (uint16_t)' ' | (uint16_t)term_color << 8; - } } } } @@ -93,8 +110,6 @@ static void render_scrollback_view(void) { } static void vga_put_char_unlocked(char c) { - if (!VGA_BUFFER) return; - /* Any new output auto-returns to live view */ vga_unscroll(); @@ -124,7 +139,8 @@ static void vga_put_char_unlocked(char c) { default: if ((unsigned char)c >= ' ') { const int index = term_row * VGA_WIDTH + term_col; - VGA_BUFFER[index] = (uint16_t)(unsigned char)c | (uint16_t)term_color << 8; + shadow[index] = (uint16_t)(unsigned char)c | (uint16_t)term_color << 8; + dirty_mark(index, index); term_col++; } break; @@ -146,17 +162,18 @@ void vga_init(void) { term_row = 0; term_color = 0x07; // Light Grey on Black - if (!VGA_BUFFER) { - return; - } - - for (int y = 0; y < VGA_HEIGHT; y++) { - for (int x = 0; x < VGA_WIDTH; x++) { - const int index = y * VGA_WIDTH + x; - VGA_BUFFER[index] = (uint16_t) ' ' | (uint16_t) term_color << 8; - } - } - vga_update_hw_cursor(); + uint16_t blank = (uint16_t)' ' | (uint16_t)term_color << 8; + for (int i = 0; i < VGA_CELLS; i++) + shadow[i] = blank; + + if (!VGA_BUFFER) return; + + for (int i = 0; i < VGA_CELLS; i++) + VGA_BUFFER[i] = blank; + + dirty_lo = VGA_CELLS; + dirty_hi = -1; + hal_video_set_cursor(0, 0); } void vga_set_color(uint8_t fg, uint8_t bg) { @@ -168,43 +185,47 @@ void vga_set_color(uint8_t fg, uint8_t bg) { void vga_put_char(char c) { uintptr_t flags = spin_lock_irqsave(&vga_lock); vga_put_char_unlocked(c); - vga_update_hw_cursor(); + vga_flush_to_hw(); spin_unlock_irqrestore(&vga_lock, flags); } -void vga_print(const char* str) { +void vga_write_buf(const char* buf, uint32_t len) { uintptr_t flags = spin_lock_irqsave(&vga_lock); - - if (!VGA_BUFFER) { - spin_unlock_irqrestore(&vga_lock, flags); - return; + for (uint32_t i = 0; i < len; i++) { + vga_put_char_unlocked(buf[i]); } + /* No MMIO flush here — deferred to vga_flush() on timer tick */ + spin_unlock_irqrestore(&vga_lock, flags); +} +void vga_print(const char* str) { + uintptr_t flags = spin_lock_irqsave(&vga_lock); for (int i = 0; str[i] != '\0'; i++) { vga_put_char_unlocked(str[i]); } + vga_flush_to_hw(); + spin_unlock_irqrestore(&vga_lock, flags); +} - vga_update_hw_cursor(); +void vga_flush(void) { + uintptr_t flags = spin_lock_irqsave(&vga_lock); + vga_flush_to_hw(); spin_unlock_irqrestore(&vga_lock, flags); } void vga_clear(void) { uintptr_t flags = spin_lock_irqsave(&vga_lock); - if (!VGA_BUFFER) { - spin_unlock_irqrestore(&vga_lock, flags); - return; - } - - for (int i = 0; i < VGA_HEIGHT * VGA_WIDTH; i++) { - VGA_BUFFER[i] = (uint16_t)' ' | (uint16_t)term_color << 8; - } + uint16_t blank = (uint16_t)' ' | (uint16_t)term_color << 8; + for (int i = 0; i < VGA_CELLS; i++) + shadow[i] = blank; + dirty_mark(0, VGA_CELLS - 1); term_col = 0; term_row = 0; view_offset = 0; sb_count = 0; sb_head = 0; - vga_update_hw_cursor(); + vga_flush_to_hw(); spin_unlock_irqrestore(&vga_lock, flags); } @@ -218,10 +239,8 @@ void vga_scroll_back(void) { } if (view_offset == 0) { - /* First scroll back — save current live screen */ - for (int i = 0; i < VGA_HEIGHT * VGA_WIDTH; i++) { - live_buf[i] = VGA_BUFFER[i]; - } + /* First scroll back — save current live screen from shadow */ + __builtin_memcpy(live_buf, shadow, sizeof(live_buf)); } view_offset += VGA_HEIGHT / 2; @@ -241,11 +260,10 @@ void vga_scroll_fwd(void) { if (view_offset <= VGA_HEIGHT / 2) { /* Return to live view */ - for (int i = 0; i < VGA_HEIGHT * VGA_WIDTH; i++) { - VGA_BUFFER[i] = live_buf[i]; - } + __builtin_memcpy(shadow, live_buf, sizeof(shadow)); + dirty_mark(0, VGA_CELLS - 1); view_offset = 0; - vga_update_hw_cursor(); + vga_flush_to_hw(); } else { view_offset -= VGA_HEIGHT / 2; render_scrollback_view(); diff --git a/src/kernel/console.c b/src/kernel/console.c index 10219b4..e3f5eac 100644 --- a/src/kernel/console.c +++ b/src/kernel/console.c @@ -67,6 +67,23 @@ void console_write(const char* s) { spin_unlock_irqrestore(&g_console_lock, flags); } +void console_write_buf(const char* buf, uint32_t len) { + if (!buf || len == 0) return; + + uintptr_t flags = spin_lock_irqsave(&g_console_lock); + + if (g_console_uart_enabled) { + for (uint32_t i = 0; i < len; i++) { + hal_uart_putc(buf[i]); + } + } + if (g_console_vga_enabled) { + vga_write_buf(buf, len); + } + + spin_unlock_irqrestore(&g_console_lock, flags); +} + void console_put_char(char c) { uintptr_t flags = spin_lock_irqsave(&g_console_lock); diff --git a/src/kernel/tty.c b/src/kernel/tty.c index 16b6c26..9330811 100644 --- a/src/kernel/tty.c +++ b/src/kernel/tty.c @@ -68,6 +68,20 @@ static void tty_output_char(char c) { console_put_char(c); } +/* OPOST-expand src into obuf; return number of bytes written to obuf. */ +static uint32_t tty_opost_expand(const char* src, uint32_t slen, + char* obuf, uint32_t osize) { + uint32_t olen = 0; + int do_onlcr = (tty_oflag & TTY_OPOST) && (tty_oflag & TTY_ONLCR); + for (uint32_t i = 0; i < slen && olen < osize; i++) { + if (do_onlcr && src[i] == '\n' && olen + 1 < osize) { + obuf[olen++] = '\r'; + } + obuf[olen++] = src[i]; + } + return olen; +} + int tty_write_kbuf(const void* kbuf, uint32_t len) { if (!kbuf) return -EFAULT; if (len > 1024 * 1024) return -EINVAL; @@ -80,8 +94,15 @@ int tty_write_kbuf(const void* kbuf, uint32_t len) { } const char* p = (const char*)kbuf; - for (uint32_t i = 0; i < len; i++) { - tty_output_char(p[i]); + char obuf[512]; + uint32_t remaining = len; + while (remaining) { + uint32_t chunk = remaining; + if (chunk > 256) chunk = 256; + uint32_t olen = tty_opost_expand(p, chunk, obuf, sizeof(obuf)); + console_write_buf(obuf, olen); + p += chunk; + remaining -= chunk; } return (int)len; } @@ -509,9 +530,9 @@ int tty_write(const void* user_buf, uint32_t len) { if (copy_from_user(kbuf, (const void*)up, (size_t)chunk) < 0) return -EFAULT; - for (uint32_t i = 0; i < chunk; i++) { - tty_output_char(kbuf[i]); - } + char obuf[512]; + uint32_t olen = tty_opost_expand(kbuf, chunk, obuf, sizeof(obuf)); + console_write_buf(obuf, olen); up += chunk; remaining -= chunk;