From: Tulio A M Mendes Date: Mon, 16 Feb 2026 17:23:24 +0000 (-0300) Subject: feat: PLT/GOT lazy binding — userspace resolver trampoline X-Git-Url: https://projects.tadryanom.me/docs/static/gitweb.css?a=commitdiff_plain;h=fbc038db1ed35835de6a51dd9b3da8cafc8a1835;p=AdrOS.git feat: PLT/GOT lazy binding — userspace resolver trampoline Kernel (elf.c): - Skip R_386_JMP_SLOT relocations when PT_INTERP present (let ld.so resolve lazily) - Load DT_NEEDED shared libraries at SHLIB_BASE (0x11000000) - Support ET_EXEC and ET_DYN interpreters with correct base offset - Fix AT_PHDR auxv computation for PIE binaries - Store auxv in static buffer for execve to push in correct stack position - Use pmm_alloc_page() instead of restrictive low-16MB allocator Execve (syscall.c): - Push auxv entries right after envp[] (Linux stack layout convention) so ld.so can find them by walking argc → argv[] → envp[] → auxv ld.so (ldso.c): - Complete rewrite for lazy PLT/GOT binding - Parse auxv (AT_ENTRY, AT_PHDR, AT_PHNUM, AT_PHENT) - Find PT_DYNAMIC, extract DT_PLTGOT/DT_JMPREL/DT_PLTRELSZ/DT_SYMTAB/DT_STRTAB - Set GOT[1]=link_map, GOT[2]=_dl_runtime_resolve trampoline - Implement _dl_runtime_resolve asm trampoline + dl_fixup C resolver - Symbol lookup in shared library via DT_HASH at SHLIB_BASE - Compiled as non-PIC ET_EXEC at INTERP_BASE (0x12000000) VMM (vmm.c): - Use pmm_alloc_page() for page table allocation (PAE PTs can be anywhere) Test infrastructure: - PIE test binary (pie_main.c) calls test_add() from libpietest.so via PLT - Shared library (pie_func.c) provides test_add() - Smoke test patterns for lazy PLT OK + PLT cached OK - 80/83 smoke tests pass, cppcheck clean --- diff --git a/Makefile b/Makefile index f018260..7da7699 100644 --- a/Makefile +++ b/Makefile @@ -81,6 +81,8 @@ ifeq ($(ARCH),x86) MKDIR_ELF := user/mkdir.elf RM_ELF := user/rm.elf LDSO_ELF := user/ld.so + PIE_SO := user/libpietest.so + PIE_ELF := user/pie_test.elf DOOM_ELF := user/doom/doom.elf INITRD_IMG := initrd.img MKINITRD := tools/mkinitrd @@ -187,12 +189,20 @@ $(MKDIR_ELF): user/mkdir.c user/linker.ld $(RM_ELF): user/rm.c user/linker.ld @i686-elf-gcc -m32 -I include -ffreestanding -fno-pie -no-pie -nostdlib -Wl,-T,user/linker.ld -o $(RM_ELF) user/rm.c user/errno.c -$(LDSO_ELF): user/ldso.c user/linker.ld - @i686-elf-gcc -m32 -I include -ffreestanding -fno-pie -no-pie -nostdlib -Wl,-T,user/linker.ld -o $(LDSO_ELF) user/ldso.c +$(LDSO_ELF): user/ldso.c user/ldso_linker.ld + @i686-elf-gcc -m32 -ffreestanding -fno-pie -no-pie -nostdlib -Wl,-T,user/ldso_linker.ld -o $(LDSO_ELF) user/ldso.c + +$(PIE_SO): user/pie_func.c + @i686-elf-gcc -m32 -fPIC -fno-plt -c user/pie_func.c -o user/pie_func.o + @i686-elf-ld -m elf_i386 -shared -soname libpietest.so -o $(PIE_SO) user/pie_func.o + +$(PIE_ELF): user/pie_main.c user/pie_linker.ld $(PIE_SO) + @i686-elf-gcc -m32 -fPIC -c user/pie_main.c -o user/pie_main.o + @i686-elf-ld -m elf_i386 -pie --dynamic-linker=/lib/ld.so -T user/pie_linker.ld -o $(PIE_ELF) user/pie_main.o $(PIE_SO) -rpath /lib FSTAB := rootfs/etc/fstab -INITRD_FILES := $(USER_ELF):bin/init.elf $(ECHO_ELF):bin/echo.elf $(SH_ELF):bin/sh $(CAT_ELF):bin/cat $(LS_ELF):bin/ls $(MKDIR_ELF):bin/mkdir $(RM_ELF):bin/rm $(LDSO_ELF):lib/ld.so $(FSTAB):etc/fstab -INITRD_DEPS := $(MKINITRD) $(USER_ELF) $(ECHO_ELF) $(SH_ELF) $(CAT_ELF) $(LS_ELF) $(MKDIR_ELF) $(RM_ELF) $(LDSO_ELF) $(FSTAB) +INITRD_FILES := $(USER_ELF):bin/init.elf $(ECHO_ELF):bin/echo.elf $(SH_ELF):bin/sh $(CAT_ELF):bin/cat $(LS_ELF):bin/ls $(MKDIR_ELF):bin/mkdir $(RM_ELF):bin/rm $(LDSO_ELF):lib/ld.so $(PIE_SO):lib/libpietest.so $(PIE_ELF):bin/pie_test.elf $(FSTAB):etc/fstab +INITRD_DEPS := $(MKINITRD) $(USER_ELF) $(ECHO_ELF) $(SH_ELF) $(CAT_ELF) $(LS_ELF) $(MKDIR_ELF) $(RM_ELF) $(LDSO_ELF) $(PIE_SO) $(PIE_ELF) $(FSTAB) # Include doom.elf if it has been built ifneq ($(wildcard $(DOOM_ELF)),) diff --git a/include/elf.h b/include/elf.h index 31b474a..dfa10ee 100644 --- a/include/elf.h +++ b/include/elf.h @@ -77,6 +77,7 @@ typedef struct { #define DT_REL 17 #define DT_RELSZ 18 #define DT_RELENT 19 +#define DT_PLTGOT 3 #define DT_JMPREL 23 #define DT_PLTRELSZ 2 #define DT_PLTREL 20 @@ -123,4 +124,8 @@ typedef struct { int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out, uintptr_t* addr_space_out, uintptr_t* heap_break_out); +/* Retrieve and consume pending auxv entries (set by elf32_load_user_from_initrd + * when an interpreter is present). Returns number of entries copied, 0 if none. */ +int elf32_pop_pending_auxv(elf32_auxv_t* out, int max); + #endif diff --git a/src/arch/x86/elf.c b/src/arch/x86/elf.c index c180209..afa3e9b 100644 --- a/src/arch/x86/elf.c +++ b/src/arch/x86/elf.c @@ -14,16 +14,19 @@ #include -static void* pmm_alloc_page_low_16mb(void) { - for (int tries = 0; tries < 4096; tries++) { - void* p = pmm_alloc_page(); - if (!p) return NULL; - if ((uintptr_t)p < 0x01000000U) { - return p; - } - pmm_free_page(p); - } - return NULL; +/* Pending auxv buffer — filled by elf32_load_user_from_initrd when an + * interpreter is present, consumed by execve to push onto the user stack + * in the correct position (right after envp[]). */ +static elf32_auxv_t g_pending_auxv[8]; +static int g_pending_auxv_count = 0; + +int elf32_pop_pending_auxv(elf32_auxv_t* out, int max) { + int n = g_pending_auxv_count; + if (n == 0) return 0; + if (n > max) n = max; + for (int i = 0; i < n; i++) out[i] = g_pending_auxv[i]; + g_pending_auxv_count = 0; + return n; } static int elf32_validate(const elf32_ehdr_t* eh, size_t file_len) { @@ -49,8 +52,7 @@ static int elf32_validate(const elf32_ehdr_t* eh, size_t file_len) { if (ph_end < eh->e_phoff) return -EINVAL; if (ph_end > file_len) return -EINVAL; - if (eh->e_entry == 0) return -EINVAL; - if (eh->e_entry >= hal_mm_kernel_virt_base()) return -EINVAL; + if (eh->e_entry != 0 && eh->e_entry >= hal_mm_kernel_virt_base()) return -EINVAL; return 0; } @@ -85,7 +87,7 @@ static int elf32_map_user_range(uintptr_t as, uintptr_t vaddr, size_t len, uint3 } if (!already_mapped) { - void* phys = pmm_alloc_page_low_16mb(); + void* phys = pmm_alloc_page(); if (!phys) { vmm_as_activate(old_as); return -ENOMEM; @@ -142,9 +144,10 @@ static int elf32_load_segments(const uint8_t* file, uint32_t file_len, /* Process ELF relocations from PT_DYNAMIC segment. * base_offset is 0 for ET_EXEC, non-zero for PIE/shared objects. + * skip_jmpslot: if true, skip R_386_JMP_SLOT (let ld.so handle lazily). * The target address space must already be activated. */ static void elf32_process_relocations(const uint8_t* file, uint32_t file_len, - uintptr_t base_offset) { + uintptr_t base_offset, int skip_jmpslot) { const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file; const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff); @@ -188,8 +191,10 @@ static void elf32_process_relocations(const uint8_t* file, uint32_t file_len, case R_386_RELATIVE: \ *target += (uint32_t)base_offset; \ break; \ - case R_386_GLOB_DAT: \ - case R_386_JMP_SLOT: { \ + case R_386_JMP_SLOT: \ + if (skip_jmpslot) break; \ + /* fall through */ \ + case R_386_GLOB_DAT: { \ uint32_t sym_idx = ELF32_R_SYM(r->r_info); \ if (symtab_addr && sym_idx) { \ const elf32_sym_t* sym = &((const elf32_sym_t*) \ @@ -231,9 +236,97 @@ static void elf32_process_relocations(const uint8_t* file, uint32_t file_len, #undef APPLY_REL } +/* Load a shared library ELF at the given base VA. + * Returns 0 on success, fills *loaded_end with highest mapped address. */ +static int elf32_load_shared_lib_at(const char* path, uintptr_t as, + uintptr_t base, uintptr_t* loaded_end) { + fs_node_t* node = vfs_lookup(path); + if (!node) return -ENOENT; + + uint32_t flen = node->length; + if (flen < sizeof(elf32_ehdr_t)) return -EINVAL; + + uint8_t* fbuf = (uint8_t*)kmalloc(flen); + if (!fbuf) return -ENOMEM; + + if (vfs_read(node, 0, flen, fbuf) != flen) { + kfree(fbuf); + return -EIO; + } + + const elf32_ehdr_t* eh = (const elf32_ehdr_t*)fbuf; + int vrc = elf32_validate(eh, flen); + if (vrc < 0) { kfree(fbuf); return vrc; } + + uintptr_t seg_end = 0; + int rc = elf32_load_segments(fbuf, flen, as, base, &seg_end); + if (rc < 0) { kfree(fbuf); return rc; } + + elf32_process_relocations(fbuf, flen, base, 0); + + if (loaded_end) *loaded_end = seg_end; + kfree(fbuf); + return 0; +} + +/* Load DT_NEEDED shared libraries from the main binary's PT_DYNAMIC. + * Libraries are loaded sequentially starting at *next_base. + * Returns number of libraries loaded. */ +#define SHLIB_BASE 0x11000000U + +static int elf32_load_needed_libs(const uint8_t* file, uint32_t file_len, + uintptr_t as, uintptr_t base_offset) { + const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file; + const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff); + + const elf32_phdr_t* dyn_ph = NULL; + for (uint16_t i = 0; i < eh->e_phnum; i++) { + if (ph[i].p_type == PT_DYNAMIC) { dyn_ph = &ph[i]; break; } + } + if (!dyn_ph) return 0; + if (dyn_ph->p_offset + dyn_ph->p_filesz > file_len) return 0; + + const elf32_dyn_t* dyn = (const elf32_dyn_t*)(file + dyn_ph->p_offset); + uint32_t dyn_count = dyn_ph->p_filesz / sizeof(elf32_dyn_t); + + uint32_t strtab_addr = 0; + for (uint32_t i = 0; i < dyn_count && dyn[i].d_tag != DT_NULL; i++) { + if (dyn[i].d_tag == DT_STRTAB) { strtab_addr = dyn[i].d_val; break; } + } + if (!strtab_addr) return 0; + + const char* strtab = (const char*)(strtab_addr + base_offset); + uintptr_t lib_base = SHLIB_BASE; + int loaded = 0; + + for (uint32_t i = 0; i < dyn_count && dyn[i].d_tag != DT_NULL; i++) { + if (dyn[i].d_tag != DT_NEEDED) continue; + const char* libname = strtab + dyn[i].d_val; + + char path[128]; + int plen = 0; + const char* pfx = "/lib/"; + while (*pfx && plen < 122) path[plen++] = *pfx++; + const char* s = libname; + while (*s && plen < 127) path[plen++] = *s++; + path[plen] = '\0'; + + uintptr_t seg_end = 0; + int rc = elf32_load_shared_lib_at(path, as, lib_base, &seg_end); + if (rc == 0) { + kprintf("[ELF] loaded shared lib: %s at 0x%x\n", path, (unsigned)lib_base); + lib_base = (seg_end + 0xFFFU) & ~(uintptr_t)0xFFFU; + loaded++; + } else { + kprintf("[ELF] warning: could not load %s (%d)\n", path, rc); + } + } + return loaded; +} + /* Load an interpreter ELF (ld.so) at INTERP_BASE. * Returns 0 on success, sets *interp_entry. */ -#define INTERP_BASE 0x40000000U +#define INTERP_BASE 0x12000000U static int elf32_load_interp(const char* interp_path, uintptr_t as, uintptr_t* interp_entry, uintptr_t* interp_base_out) { @@ -263,15 +356,23 @@ static int elf32_load_interp(const char* interp_path, uintptr_t as, return vrc; } + /* ET_EXEC interpreter has absolute addresses (no offset needed). + * ET_DYN interpreter is position-independent, loaded at INTERP_BASE. */ + uintptr_t base_off = (eh->e_type == ET_DYN) ? INTERP_BASE : 0; + uintptr_t dummy = 0; - int rc = elf32_load_segments(fbuf, flen, as, INTERP_BASE, &dummy); + int rc = elf32_load_segments(fbuf, flen, as, base_off, &dummy); if (rc < 0) { kfree(fbuf); return rc; } - *interp_entry = (uintptr_t)eh->e_entry + INTERP_BASE; - if (interp_base_out) *interp_base_out = INTERP_BASE; + if (eh->e_type == ET_DYN) { + elf32_process_relocations(fbuf, flen, base_off, 0); + } + + *interp_entry = (uintptr_t)eh->e_entry + base_off; + if (interp_base_out) *interp_base_out = (base_off ? base_off : (uintptr_t)eh->e_entry); kfree(fbuf); return 0; @@ -317,12 +418,12 @@ int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uint const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file; int vrc = elf32_validate(eh, file_len); - if (vrc < 0) { + if (vrc < 0 || eh->e_entry == 0) { kprintf("[ELF] invalid ELF header\n"); kfree(file); vmm_as_activate(old_as); vmm_as_destroy(new_as); - return vrc; + return vrc < 0 ? vrc : -EINVAL; } uintptr_t highest_seg_end = 0; @@ -335,10 +436,7 @@ int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uint return lrc; } - /* Process relocations (R_386_RELATIVE, GLOB_DAT, JMP_SLOT, R_386_32) */ - elf32_process_relocations(file, file_len, 0); - - /* Check for PT_INTERP — if present, load the dynamic linker */ + /* Check for PT_INTERP first — determines relocation strategy */ const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff); uintptr_t real_entry = (uintptr_t)eh->e_entry; int has_interp = 0; @@ -363,6 +461,14 @@ int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uint break; } } + + /* Process relocations — skip JMP_SLOT when ld.so will handle them lazily */ + elf32_process_relocations(file, file_len, 0, has_interp); + + /* Load DT_NEEDED shared libraries (kernel loads segments, ld.so resolves PLT) */ + if (has_interp) { + elf32_load_needed_libs(file, file_len, new_as, 0); + } /* 32 KB user stack with a 4 KB guard page below (unmapped). * Guard page at stack_base - 0x1000 is left unmapped so stack overflow * triggers a page fault → SIGSEGV instead of silent corruption. @@ -383,18 +489,28 @@ int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uint uintptr_t sp = user_stack_base + user_stack_size; - /* When an interpreter is loaded, push auxv entries onto the user stack - * so ld.so can locate the program entry point and ELF headers. */ + /* When an interpreter is loaded, save auxv entries into a static buffer. + * The execve handler will push them onto the user stack in the correct + * position (right after envp[]) so ld.so can find them. */ if (has_interp) { - elf32_auxv_t auxv[6]; - auxv[0].a_type = AT_ENTRY; auxv[0].a_val = (uint32_t)eh->e_entry; - auxv[1].a_type = AT_BASE; auxv[1].a_val = INTERP_BASE; - auxv[2].a_type = AT_PAGESZ; auxv[2].a_val = 0x1000; - auxv[3].a_type = AT_PHDR; auxv[3].a_val = (uint32_t)eh->e_phoff + (uint32_t)eh->e_entry; - auxv[4].a_type = AT_PHNUM; auxv[4].a_val = eh->e_phnum; - auxv[5].a_type = AT_NULL; auxv[5].a_val = 0; - sp -= sizeof(auxv); - memcpy((void*)sp, auxv, sizeof(auxv)); + /* Compute AT_PHDR: find the first PT_LOAD that covers e_phoff */ + uint32_t phdr_va = 0; + for (uint16_t i = 0; i < eh->e_phnum; i++) { + if (ph[i].p_type == PT_LOAD && + eh->e_phoff >= ph[i].p_offset && + eh->e_phoff < ph[i].p_offset + ph[i].p_filesz) { + phdr_va = ph[i].p_vaddr + (eh->e_phoff - ph[i].p_offset); + break; + } + } + g_pending_auxv[0].a_type = AT_ENTRY; g_pending_auxv[0].a_val = (uint32_t)eh->e_entry; + g_pending_auxv[1].a_type = AT_BASE; g_pending_auxv[1].a_val = INTERP_BASE; + g_pending_auxv[2].a_type = AT_PAGESZ; g_pending_auxv[2].a_val = 0x1000; + g_pending_auxv[3].a_type = AT_PHDR; g_pending_auxv[3].a_val = phdr_va; + g_pending_auxv[4].a_type = AT_PHNUM; g_pending_auxv[4].a_val = eh->e_phnum; + g_pending_auxv[5].a_type = AT_PHENT; g_pending_auxv[5].a_val = eh->e_phentsize; + g_pending_auxv[6].a_type = AT_NULL; g_pending_auxv[6].a_val = 0; + g_pending_auxv_count = 7; } /* Map vDSO shared page read-only into user address space */ diff --git a/src/arch/x86/vmm.c b/src/arch/x86/vmm.c index 7a42ae9..1e5da38 100644 --- a/src/arch/x86/vmm.c +++ b/src/arch/x86/vmm.c @@ -126,7 +126,7 @@ static void vmm_map_page_nolock(uint64_t phys, uint64_t virt, uint32_t flags) { volatile uint64_t* pd = pae_pd_recursive(pi); if ((pd[di] & X86_PTE_PRESENT) == 0) { - uint32_t pt_phys = (uint32_t)(uintptr_t)pmm_alloc_page_low(); + uint32_t pt_phys = (uint32_t)(uintptr_t)pmm_alloc_page(); if (!pt_phys) { kprintf("[VMM] OOM allocating page table.\n"); return; diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c index cd9ad83..5bded8d 100644 --- a/src/kernel/syscall.c +++ b/src/kernel/syscall.c @@ -1986,6 +1986,17 @@ static int syscall_execve_impl(struct registers* regs, const char* user_path, co sp &= ~(uintptr_t)0xF; + // Push auxv entries (if interpreter present) — must come right after envp[] + // so ld.so can find them by walking: argc → argv[] → envp[] → auxv. + { + elf32_auxv_t auxv_buf[8]; + int auxv_n = elf32_pop_pending_auxv(auxv_buf, 8); + if (auxv_n > 0) { + sp -= (uintptr_t)(auxv_n * sizeof(elf32_auxv_t)); + memcpy((void*)sp, auxv_buf, (size_t)auxv_n * sizeof(elf32_auxv_t)); + } + } + // Push envp[] pointers sp -= (uintptr_t)(sizeof(uintptr_t) * (envc + 1)); memcpy((void*)sp, envp_ptrs_va, sizeof(uintptr_t) * (envc + 1)); diff --git a/tests/smoke_test.exp b/tests/smoke_test.exp index a7f712b..9457718 100755 --- a/tests/smoke_test.exp +++ b/tests/smoke_test.exp @@ -122,6 +122,8 @@ set tests { {"waitpid WNOHANG" "\\[init\\] waitpid WNOHANG OK"} {"SIGSEGV handler" "\\[init\\] SIGSEGV OK"} {"waitpid 100 children" "\\[init\\] waitpid OK \\(100 children"} + {"lazy PLT" "\\[init\\] lazy PLT OK"} + {"PLT cached" "\\[init\\] PLT cached OK"} {"PING network" "\\[PING\\] .*received.*network OK"} {"echo.elf execve" "\\[echo\\] hello from echo.elf"} } diff --git a/user/init.c b/user/init.c index 53f6de8..819298a 100644 --- a/user/init.c +++ b/user/init.c @@ -3722,6 +3722,21 @@ void _start(void) { } } + // PIE lazy PLT/GOT binding test + { + int pid = sys_fork(); + if (pid == 0) { + static const char* const av[] = {"pie_test.elf", 0}; + static const char* const ev[] = {0}; + (void)sys_execve("/bin/pie_test.elf", av, ev); + sys_exit(99); + } + if (pid > 0) { + int st = 0; + (void)sys_waitpid(pid, &st, 0); + } + } + { int pid = sys_fork(); if (pid < 0) { diff --git a/user/ldso.c b/user/ldso.c index 46e1f20..e7b5803 100644 --- a/user/ldso.c +++ b/user/ldso.c @@ -1,50 +1,324 @@ -/* Minimal userspace dynamic linker (ld.so). +/* Userspace dynamic linker (ld.so) with lazy PLT/GOT binding. * * The kernel ELF loader pushes an auxiliary vector (auxv) onto the user - * stack when PT_INTERP is present. This linker parses auxv to find - * AT_ENTRY (the real program entry point), then jumps there. + * stack when PT_INTERP is present. This linker: + * 1. Parses auxv to find AT_PHDR, AT_PHNUM, AT_ENTRY + * 2. Walks program headers to find PT_DYNAMIC + * 3. Extracts DT_PLTGOT, DT_JMPREL, DT_PLTRELSZ, DT_SYMTAB, DT_STRTAB + * 4. Sets GOT[1] = link_map pointer, GOT[2] = _dl_runtime_resolve + * 5. Jumps to AT_ENTRY (the real program entry point) * - * The kernel already performs eager relocation of R_386_RELATIVE, - * R_386_GLOB_DAT, R_386_JMP_SLOT, and R_386_32 before transferring - * control, so no additional relocation processing is needed here. + * On first PLT call, the resolver fires: looks up the symbol, patches + * the GOT entry, and jumps to the resolved function. Subsequent calls + * go directly through the patched GOT (zero overhead). * - * Future work: lazy PLT binding via GOT[2] resolver trampoline. */ + * The kernel loads DT_NEEDED shared libraries at SHLIB_BASE (0x20000000). + * The resolver scans the .so's dynamic symtab to find undefined symbols. */ +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef int int32_t; + +/* ---- Auxiliary vector types ---- */ #define AT_NULL 0 +#define AT_PHDR 3 +#define AT_PHENT 4 +#define AT_PHNUM 5 #define AT_ENTRY 9 -typedef unsigned int uint32_t; +/* ---- ELF types (minimal, matching kernel include/elf.h) ---- */ +#define PT_LOAD 1 +#define PT_DYNAMIC 2 + +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_JMPREL 23 + +#define R_386_JMP_SLOT 7 + +#define ELF32_R_SYM(i) ((i) >> 8) +#define ELF32_R_TYPE(i) ((unsigned char)(i)) -struct auxv_entry { - uint32_t a_type; - uint32_t a_val; +#define STB_GLOBAL 1 +#define STB_WEAK 2 +#define ELF32_ST_BIND(i) ((i) >> 4) + +#define SHLIB_BASE 0x11000000U + +struct elf32_phdr { + uint32_t p_type, p_offset, p_vaddr, p_paddr; + uint32_t p_filesz, p_memsz, p_flags, p_align; }; -void _start(void) __attribute__((noreturn, naked, section(".text.start"))); +struct elf32_dyn { + int32_t d_tag; + uint32_t d_val; +}; + +struct elf32_rel { + uint32_t r_offset; + uint32_t r_info; +}; + +struct elf32_sym { + uint32_t st_name, st_value, st_size; + uint8_t st_info, st_other; + uint16_t st_shndx; +}; + +/* ---- Link map: per-module metadata for the resolver ---- */ +struct link_map { + uint32_t l_addr; /* base load address (0 for ET_EXEC) */ + uint32_t jmprel; /* DT_JMPREL VA (relocation table for .rel.plt) */ + uint32_t pltrelsz; /* DT_PLTRELSZ */ + uint32_t symtab; /* DT_SYMTAB VA */ + uint32_t strtab; /* DT_STRTAB VA */ + /* Shared lib symbol lookup info */ + uint32_t shlib_symtab; /* .so DT_SYMTAB VA (0 if no .so) */ + uint32_t shlib_strtab; /* .so DT_STRTAB VA */ + uint32_t shlib_base; /* .so load base */ + uint32_t shlib_hash; /* .so DT_HASH VA */ +}; + +static struct link_map g_map; + +/* ---- Minimal string helpers (no libc) ---- */ +static int str_eq(const char* a, const char* b) { + while (*a && *b) { if (*a++ != *b++) return 0; } + return *a == *b; +} + +/* ---- ELF hash (for DT_HASH lookup) ---- */ +static uint32_t elf_hash(const char* name) { + uint32_t h = 0, g; + while (*name) { + h = (h << 4) + (uint8_t)*name++; + g = h & 0xF0000000U; + if (g) h ^= g >> 24; + h &= ~g; + } + return h; +} + +/* ---- Symbol lookup in a shared library via DT_HASH ---- */ +static uint32_t shlib_lookup(const char* name, const struct link_map* map) { + if (!map->shlib_symtab || !map->shlib_strtab || !map->shlib_hash) + return 0; + + const uint32_t* hashtab = (const uint32_t*)(map->shlib_hash + map->shlib_base); + uint32_t nbucket = hashtab[0]; + uint32_t nchain = hashtab[1]; + const uint32_t* bucket = &hashtab[2]; + const uint32_t* chain = &hashtab[2 + nbucket]; + (void)nchain; + + uint32_t h = elf_hash(name) % nbucket; + const struct elf32_sym* symtab = (const struct elf32_sym*)(map->shlib_symtab + map->shlib_base); + const char* strtab = (const char*)(map->shlib_strtab + map->shlib_base); + + for (uint32_t i = bucket[h]; i != 0; i = chain[i]) { + const struct elf32_sym* sym = &symtab[i]; + uint8_t bind = ELF32_ST_BIND(sym->st_info); + if ((bind == STB_GLOBAL || bind == STB_WEAK) && + sym->st_shndx != 0 && sym->st_value != 0) { + if (str_eq(strtab + sym->st_name, name)) + return sym->st_value + map->shlib_base; + } + } + return 0; +} + +/* ---- dl_fixup: called by _dl_runtime_resolve trampoline ---- + * Resolves a single PLT entry: looks up the symbol, patches GOT, + * returns the resolved address. */ +uint32_t dl_fixup(struct link_map* map, uint32_t reloc_offset) + __attribute__((used, visibility("hidden"))); + +uint32_t dl_fixup(struct link_map* map, uint32_t reloc_offset) { + const struct elf32_rel* rel = + (const struct elf32_rel*)(map->jmprel + reloc_offset); + + uint32_t sym_idx = ELF32_R_SYM(rel->r_info); + const struct elf32_sym* sym = + &((const struct elf32_sym*)map->symtab)[sym_idx]; + + uint32_t resolved = 0; + + if (sym->st_value != 0) { + resolved = sym->st_value + map->l_addr; + } else { + const char* name = (const char*)map->strtab + sym->st_name; + resolved = shlib_lookup(name, map); + } + + if (resolved) { + uint32_t* got_entry = (uint32_t*)(rel->r_offset + map->l_addr); + *got_entry = resolved; + } + return resolved; +} + +/* ---- _dl_runtime_resolve: PLT[0] jumps here via GOT[2] ---- + * Entry stack: [link_map*] [reloc_offset] [return_addr] + * Uses the glibc i386 convention: save eax/ecx/edx, call dl_fixup, + * restore, ret $8 to jump to resolved function. */ +void _dl_runtime_resolve(void) + __attribute__((naked, used, visibility("hidden"))); + +void _dl_runtime_resolve(void) { + __asm__ volatile( + "pushl %%eax\n" + "pushl %%ecx\n" + "pushl %%edx\n" + "movl 16(%%esp), %%edx\n" /* reloc_offset */ + "movl 12(%%esp), %%eax\n" /* link_map* */ + "pushl %%edx\n" + "pushl %%eax\n" + "call dl_fixup\n" + "addl $8, %%esp\n" + "popl %%edx\n" + "popl %%ecx\n" + "xchgl %%eax, (%%esp)\n" /* restore eax, put resolved addr on stack */ + "ret $8\n" /* jump to resolved; pop link_map + reloc_offset */ + ::: "memory" + ); +} + +/* ---- Parse a PT_DYNAMIC at the given VA to extract .so symtab info ---- */ +static void parse_shlib_dynamic(uint32_t dyn_va, uint32_t base) { + const struct elf32_dyn* d = (const struct elf32_dyn*)dyn_va; + for (; d->d_tag != DT_NULL; d++) { + switch (d->d_tag) { + case DT_SYMTAB: g_map.shlib_symtab = d->d_val; break; + case DT_STRTAB: g_map.shlib_strtab = d->d_val; break; + case DT_HASH: g_map.shlib_hash = d->d_val; break; + } + } + g_map.shlib_base = base; +} + +/* ---- Scan for shared library's PT_DYNAMIC at SHLIB_BASE ---- */ +static void find_shlib_info(void) { + const uint8_t* base = (const uint8_t*)SHLIB_BASE; + /* Check ELF magic at SHLIB_BASE */ + if (base[0] != 0x7F || base[1] != 'E' || base[2] != 'L' || base[3] != 'F') + return; + + uint32_t e_phoff = *(const uint32_t*)(base + 28); + uint16_t e_phnum = *(const uint16_t*)(base + 44); + uint16_t e_phentsize = *(const uint16_t*)(base + 42); + + for (uint16_t i = 0; i < e_phnum; i++) { + const struct elf32_phdr* ph = + (const struct elf32_phdr*)(base + e_phoff + i * e_phentsize); + if (ph->p_type == PT_DYNAMIC) { + parse_shlib_dynamic(ph->p_vaddr + SHLIB_BASE, SHLIB_BASE); + return; + } + } +} + +/* ---- Entry point ---- */ +static void _start_c(uint32_t* initial_sp) __attribute__((noreturn, used)); + +void _start(void) __attribute__((noreturn, naked, section(".text.start"))); void _start(void) { __asm__ volatile( - /* ESP points to the auxv array pushed by the kernel. - * Scan for AT_ENTRY (type 9) to find the real program entry. */ - "mov %%esp, %%esi\n" /* esi = auxv pointer */ - "1:\n" - "mov 0(%%esi), %%eax\n" /* eax = a_type */ - "test %%eax, %%eax\n" /* AT_NULL? */ - "jz 2f\n" - "cmp $9, %%eax\n" /* AT_ENTRY? */ - "je 3f\n" - "add $8, %%esi\n" /* next entry */ - "jmp 1b\n" - "3:\n" - "mov 4(%%esi), %%eax\n" /* eax = AT_ENTRY value */ - "jmp *%%eax\n" /* jump to real program entry */ - "2:\n" - /* AT_ENTRY not found — exit(127) */ - "mov $2, %%eax\n" /* SYSCALL_EXIT */ - "mov $127, %%ebx\n" - "int $0x80\n" - "3:\n" - "jmp 3b\n" - ::: "eax", "esi", "memory" + "pushl %%esp\n" + "call _start_c\n" + ::: "memory" + ); + __builtin_unreachable(); +} + +static void _start_c(uint32_t* initial_sp) { + /* Stack layout set by execve: + * initial_sp → argc + * argv[0], argv[1], ..., NULL + * envp[0], envp[1], ..., NULL + * auxv[0], auxv[1], ..., {AT_NULL, 0} */ + uint32_t* sp = initial_sp; + + uint32_t argc = *sp++; + sp += argc + 1; /* skip argv[] + NULL terminator */ + while (*sp) sp++; /* skip envp[] entries */ + sp++; /* skip envp NULL terminator */ + + /* sp now points to auxv array */ + uint32_t at_entry = 0; + uint32_t at_phdr = 0; + uint32_t at_phnum = 0; + uint32_t at_phent = 0; + + for (uint32_t* p = sp; p[0] != AT_NULL; p += 2) { + switch (p[0]) { + case AT_ENTRY: at_entry = p[1]; break; + case AT_PHDR: at_phdr = p[1]; break; + case AT_PHNUM: at_phnum = p[1]; break; + case AT_PHENT: at_phent = p[1]; break; + } + } + + if (!at_entry) { + __asm__ volatile("mov $2, %%eax\n mov $127, %%ebx\n int $0x80" ::: "eax", "ebx"); + __builtin_unreachable(); + } + + /* Walk program headers to find PT_DYNAMIC */ + g_map.l_addr = 0; + + if (at_phdr && at_phnum && at_phent) { + for (uint32_t i = 0; i < at_phnum; i++) { + const struct elf32_phdr* ph = + (const struct elf32_phdr*)(at_phdr + i * at_phent); + if (ph->p_type == PT_DYNAMIC) { + uint32_t dyn_va = ph->p_vaddr + g_map.l_addr; + const struct elf32_dyn* d = (const struct elf32_dyn*)dyn_va; + uint32_t pltgot = 0; + + for (; d->d_tag != DT_NULL; d++) { + switch (d->d_tag) { + case DT_PLTGOT: pltgot = d->d_val; break; + case DT_JMPREL: g_map.jmprel = d->d_val; break; + case DT_PLTRELSZ: g_map.pltrelsz = d->d_val; break; + case DT_SYMTAB: g_map.symtab = d->d_val; break; + case DT_STRTAB: g_map.strtab = d->d_val; break; + } + } + + /* Set up GOT for lazy binding: + * GOT[0] = _DYNAMIC (already set by linker) + * GOT[1] = link_map pointer + * GOT[2] = _dl_runtime_resolve address */ + if (pltgot && g_map.jmprel) { + uint32_t* got = (uint32_t*)(pltgot + g_map.l_addr); + got[1] = (uint32_t)&g_map; + got[2] = (uint32_t)&_dl_runtime_resolve; + } + break; + } + } + } + + /* Scan for shared library info at SHLIB_BASE */ + find_shlib_info(); + + /* Jump to the real program entry point */ + __asm__ volatile( + "jmp *%0\n" + :: "r"(at_entry) + : "memory" ); + __builtin_unreachable(); } diff --git a/user/ldso_linker.ld b/user/ldso_linker.ld new file mode 100644 index 0000000..60641a7 --- /dev/null +++ b/user/ldso_linker.ld @@ -0,0 +1,22 @@ +ENTRY(_start) + +SECTIONS +{ + . = 0x12000000; + + .text : { + *(.text.start) + *(.text .text.*) + } + + .rodata : { *(.rodata*) } + + . = ALIGN(0x1000); + + .data : { *(.data*) } + + .bss : { + *(.bss*) + *(COMMON) + } +} diff --git a/user/pie_func.c b/user/pie_func.c new file mode 100644 index 0000000..8e744f8 --- /dev/null +++ b/user/pie_func.c @@ -0,0 +1,7 @@ +/* Shared library function for PLT/GOT lazy binding test. + * Compiled as a shared object (libpietest.so), loaded at SHLIB_BASE by kernel. + * The main PIE binary calls test_add() through PLT — resolved lazily by ld.so. */ + +int test_add(int a, int b) { + return a + b; +} diff --git a/user/pie_linker.ld b/user/pie_linker.ld new file mode 100644 index 0000000..a6885a1 --- /dev/null +++ b/user/pie_linker.ld @@ -0,0 +1,33 @@ +ENTRY(_start) + +SECTIONS +{ + . = 0x10000000; + + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.plt : { *(.rel.plt) } + .plt : { *(.plt) *(.plt.*) } + + .text : { + *(.text.start) + *(.text .text.*) + } + + .rodata : { *(.rodata*) } + + . = ALIGN(0x1000); + + .dynamic : { *(.dynamic) } + .got.plt : { *(.got.plt) } + .got : { *(.got) } + + .data : { *(.data*) } + + .bss : { + *(.bss*) + *(COMMON) + } +} diff --git a/user/pie_main.c b/user/pie_main.c new file mode 100644 index 0000000..3a6805d --- /dev/null +++ b/user/pie_main.c @@ -0,0 +1,34 @@ +/* PIE test binary for PLT/GOT lazy binding verification. + * Calls test_add() from libpietest.so through PLT — resolved lazily by ld.so. + * Built as: i686-elf-ld -pie --dynamic-linker=/lib/ld.so */ + +static inline void sys_exit(int code) { + __asm__ volatile("int $0x80" :: "a"(2), "b"(code) : "memory"); +} + +static inline int sys_write(int fd, const void* buf, unsigned len) { + int ret; + __asm__ volatile("int $0x80" : "=a"(ret) : "a"(1), "b"(fd), "c"(buf), "d"(len) : "memory"); + return ret; +} + +extern int test_add(int a, int b); + +void _start(void) { + int r = test_add(38, 4); + if (r == 42) { + sys_write(1, "[init] lazy PLT OK\n", 19); + } else { + sys_write(1, "[init] lazy PLT FAIL\n", 21); + } + + /* Call again — this time GOT is already patched, tests direct path */ + r = test_add(100, 23); + if (r == 123) { + sys_write(1, "[init] PLT cached OK\n", 21); + } else { + sys_write(1, "[init] PLT cached FAIL\n", 23); + } + + sys_exit(0); +}