MKDIR_ELF := user/mkdir.elf
RM_ELF := user/rm.elf
LDSO_ELF := user/ld.so
+ PIE_SO := user/libpietest.so
+ PIE_ELF := user/pie_test.elf
DOOM_ELF := user/doom/doom.elf
INITRD_IMG := initrd.img
MKINITRD := tools/mkinitrd
$(RM_ELF): user/rm.c user/linker.ld
@i686-elf-gcc -m32 -I include -ffreestanding -fno-pie -no-pie -nostdlib -Wl,-T,user/linker.ld -o $(RM_ELF) user/rm.c user/errno.c
-$(LDSO_ELF): user/ldso.c user/linker.ld
- @i686-elf-gcc -m32 -I include -ffreestanding -fno-pie -no-pie -nostdlib -Wl,-T,user/linker.ld -o $(LDSO_ELF) user/ldso.c
+$(LDSO_ELF): user/ldso.c user/ldso_linker.ld
+ @i686-elf-gcc -m32 -ffreestanding -fno-pie -no-pie -nostdlib -Wl,-T,user/ldso_linker.ld -o $(LDSO_ELF) user/ldso.c
+
+$(PIE_SO): user/pie_func.c
+ @i686-elf-gcc -m32 -fPIC -fno-plt -c user/pie_func.c -o user/pie_func.o
+ @i686-elf-ld -m elf_i386 -shared -soname libpietest.so -o $(PIE_SO) user/pie_func.o
+
+$(PIE_ELF): user/pie_main.c user/pie_linker.ld $(PIE_SO)
+ @i686-elf-gcc -m32 -fPIC -c user/pie_main.c -o user/pie_main.o
+ @i686-elf-ld -m elf_i386 -pie --dynamic-linker=/lib/ld.so -T user/pie_linker.ld -o $(PIE_ELF) user/pie_main.o $(PIE_SO) -rpath /lib
FSTAB := rootfs/etc/fstab
-INITRD_FILES := $(USER_ELF):bin/init.elf $(ECHO_ELF):bin/echo.elf $(SH_ELF):bin/sh $(CAT_ELF):bin/cat $(LS_ELF):bin/ls $(MKDIR_ELF):bin/mkdir $(RM_ELF):bin/rm $(LDSO_ELF):lib/ld.so $(FSTAB):etc/fstab
-INITRD_DEPS := $(MKINITRD) $(USER_ELF) $(ECHO_ELF) $(SH_ELF) $(CAT_ELF) $(LS_ELF) $(MKDIR_ELF) $(RM_ELF) $(LDSO_ELF) $(FSTAB)
+INITRD_FILES := $(USER_ELF):bin/init.elf $(ECHO_ELF):bin/echo.elf $(SH_ELF):bin/sh $(CAT_ELF):bin/cat $(LS_ELF):bin/ls $(MKDIR_ELF):bin/mkdir $(RM_ELF):bin/rm $(LDSO_ELF):lib/ld.so $(PIE_SO):lib/libpietest.so $(PIE_ELF):bin/pie_test.elf $(FSTAB):etc/fstab
+INITRD_DEPS := $(MKINITRD) $(USER_ELF) $(ECHO_ELF) $(SH_ELF) $(CAT_ELF) $(LS_ELF) $(MKDIR_ELF) $(RM_ELF) $(LDSO_ELF) $(PIE_SO) $(PIE_ELF) $(FSTAB)
# Include doom.elf if it has been built
ifneq ($(wildcard $(DOOM_ELF)),)
#define DT_REL 17
#define DT_RELSZ 18
#define DT_RELENT 19
+#define DT_PLTGOT 3
#define DT_JMPREL 23
#define DT_PLTRELSZ 2
#define DT_PLTREL 20
int elf32_load_user_from_initrd(const char* filename, uintptr_t* entry_out, uintptr_t* user_stack_top_out, uintptr_t* addr_space_out, uintptr_t* heap_break_out);
+/* Retrieve and consume pending auxv entries (set by elf32_load_user_from_initrd
+ * when an interpreter is present). Returns number of entries copied, 0 if none. */
+int elf32_pop_pending_auxv(elf32_auxv_t* out, int max);
+
#endif
#include <stdint.h>
-static void* pmm_alloc_page_low_16mb(void) {
- for (int tries = 0; tries < 4096; tries++) {
- void* p = pmm_alloc_page();
- if (!p) return NULL;
- if ((uintptr_t)p < 0x01000000U) {
- return p;
- }
- pmm_free_page(p);
- }
- return NULL;
+/* Pending auxv buffer — filled by elf32_load_user_from_initrd when an
+ * interpreter is present, consumed by execve to push onto the user stack
+ * in the correct position (right after envp[]). */
+static elf32_auxv_t g_pending_auxv[8];
+static int g_pending_auxv_count = 0;
+
+int elf32_pop_pending_auxv(elf32_auxv_t* out, int max) {
+ int n = g_pending_auxv_count;
+ if (n == 0) return 0;
+ if (n > max) n = max;
+ for (int i = 0; i < n; i++) out[i] = g_pending_auxv[i];
+ g_pending_auxv_count = 0;
+ return n;
}
static int elf32_validate(const elf32_ehdr_t* eh, size_t file_len) {
if (ph_end < eh->e_phoff) return -EINVAL;
if (ph_end > file_len) return -EINVAL;
- if (eh->e_entry == 0) return -EINVAL;
- if (eh->e_entry >= hal_mm_kernel_virt_base()) return -EINVAL;
+ if (eh->e_entry != 0 && eh->e_entry >= hal_mm_kernel_virt_base()) return -EINVAL;
return 0;
}
}
if (!already_mapped) {
- void* phys = pmm_alloc_page_low_16mb();
+ void* phys = pmm_alloc_page();
if (!phys) {
vmm_as_activate(old_as);
return -ENOMEM;
/* Process ELF relocations from PT_DYNAMIC segment.
* base_offset is 0 for ET_EXEC, non-zero for PIE/shared objects.
+ * skip_jmpslot: if true, skip R_386_JMP_SLOT (let ld.so handle lazily).
* The target address space must already be activated. */
static void elf32_process_relocations(const uint8_t* file, uint32_t file_len,
- uintptr_t base_offset) {
+ uintptr_t base_offset, int skip_jmpslot) {
const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file;
const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff);
case R_386_RELATIVE: \
*target += (uint32_t)base_offset; \
break; \
- case R_386_GLOB_DAT: \
- case R_386_JMP_SLOT: { \
+ case R_386_JMP_SLOT: \
+ if (skip_jmpslot) break; \
+ /* fall through */ \
+ case R_386_GLOB_DAT: { \
uint32_t sym_idx = ELF32_R_SYM(r->r_info); \
if (symtab_addr && sym_idx) { \
const elf32_sym_t* sym = &((const elf32_sym_t*) \
#undef APPLY_REL
}
+/* Load a shared library ELF at the given base VA.
+ * Returns 0 on success, fills *loaded_end with highest mapped address. */
+static int elf32_load_shared_lib_at(const char* path, uintptr_t as,
+ uintptr_t base, uintptr_t* loaded_end) {
+ fs_node_t* node = vfs_lookup(path);
+ if (!node) return -ENOENT;
+
+ uint32_t flen = node->length;
+ if (flen < sizeof(elf32_ehdr_t)) return -EINVAL;
+
+ uint8_t* fbuf = (uint8_t*)kmalloc(flen);
+ if (!fbuf) return -ENOMEM;
+
+ if (vfs_read(node, 0, flen, fbuf) != flen) {
+ kfree(fbuf);
+ return -EIO;
+ }
+
+ const elf32_ehdr_t* eh = (const elf32_ehdr_t*)fbuf;
+ int vrc = elf32_validate(eh, flen);
+ if (vrc < 0) { kfree(fbuf); return vrc; }
+
+ uintptr_t seg_end = 0;
+ int rc = elf32_load_segments(fbuf, flen, as, base, &seg_end);
+ if (rc < 0) { kfree(fbuf); return rc; }
+
+ elf32_process_relocations(fbuf, flen, base, 0);
+
+ if (loaded_end) *loaded_end = seg_end;
+ kfree(fbuf);
+ return 0;
+}
+
+/* Load DT_NEEDED shared libraries from the main binary's PT_DYNAMIC.
+ * Libraries are loaded sequentially starting at *next_base.
+ * Returns number of libraries loaded. */
+#define SHLIB_BASE 0x11000000U
+
+static int elf32_load_needed_libs(const uint8_t* file, uint32_t file_len,
+ uintptr_t as, uintptr_t base_offset) {
+ const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file;
+ const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff);
+
+ const elf32_phdr_t* dyn_ph = NULL;
+ for (uint16_t i = 0; i < eh->e_phnum; i++) {
+ if (ph[i].p_type == PT_DYNAMIC) { dyn_ph = &ph[i]; break; }
+ }
+ if (!dyn_ph) return 0;
+ if (dyn_ph->p_offset + dyn_ph->p_filesz > file_len) return 0;
+
+ const elf32_dyn_t* dyn = (const elf32_dyn_t*)(file + dyn_ph->p_offset);
+ uint32_t dyn_count = dyn_ph->p_filesz / sizeof(elf32_dyn_t);
+
+ uint32_t strtab_addr = 0;
+ for (uint32_t i = 0; i < dyn_count && dyn[i].d_tag != DT_NULL; i++) {
+ if (dyn[i].d_tag == DT_STRTAB) { strtab_addr = dyn[i].d_val; break; }
+ }
+ if (!strtab_addr) return 0;
+
+ const char* strtab = (const char*)(strtab_addr + base_offset);
+ uintptr_t lib_base = SHLIB_BASE;
+ int loaded = 0;
+
+ for (uint32_t i = 0; i < dyn_count && dyn[i].d_tag != DT_NULL; i++) {
+ if (dyn[i].d_tag != DT_NEEDED) continue;
+ const char* libname = strtab + dyn[i].d_val;
+
+ char path[128];
+ int plen = 0;
+ const char* pfx = "/lib/";
+ while (*pfx && plen < 122) path[plen++] = *pfx++;
+ const char* s = libname;
+ while (*s && plen < 127) path[plen++] = *s++;
+ path[plen] = '\0';
+
+ uintptr_t seg_end = 0;
+ int rc = elf32_load_shared_lib_at(path, as, lib_base, &seg_end);
+ if (rc == 0) {
+ kprintf("[ELF] loaded shared lib: %s at 0x%x\n", path, (unsigned)lib_base);
+ lib_base = (seg_end + 0xFFFU) & ~(uintptr_t)0xFFFU;
+ loaded++;
+ } else {
+ kprintf("[ELF] warning: could not load %s (%d)\n", path, rc);
+ }
+ }
+ return loaded;
+}
+
/* Load an interpreter ELF (ld.so) at INTERP_BASE.
* Returns 0 on success, sets *interp_entry. */
-#define INTERP_BASE 0x40000000U
+#define INTERP_BASE 0x12000000U
static int elf32_load_interp(const char* interp_path, uintptr_t as,
uintptr_t* interp_entry, uintptr_t* interp_base_out) {
return vrc;
}
+ /* ET_EXEC interpreter has absolute addresses (no offset needed).
+ * ET_DYN interpreter is position-independent, loaded at INTERP_BASE. */
+ uintptr_t base_off = (eh->e_type == ET_DYN) ? INTERP_BASE : 0;
+
uintptr_t dummy = 0;
- int rc = elf32_load_segments(fbuf, flen, as, INTERP_BASE, &dummy);
+ int rc = elf32_load_segments(fbuf, flen, as, base_off, &dummy);
if (rc < 0) {
kfree(fbuf);
return rc;
}
- *interp_entry = (uintptr_t)eh->e_entry + INTERP_BASE;
- if (interp_base_out) *interp_base_out = INTERP_BASE;
+ if (eh->e_type == ET_DYN) {
+ elf32_process_relocations(fbuf, flen, base_off, 0);
+ }
+
+ *interp_entry = (uintptr_t)eh->e_entry + base_off;
+ if (interp_base_out) *interp_base_out = (base_off ? base_off : (uintptr_t)eh->e_entry);
kfree(fbuf);
return 0;
const elf32_ehdr_t* eh = (const elf32_ehdr_t*)file;
int vrc = elf32_validate(eh, file_len);
- if (vrc < 0) {
+ if (vrc < 0 || eh->e_entry == 0) {
kprintf("[ELF] invalid ELF header\n");
kfree(file);
vmm_as_activate(old_as);
vmm_as_destroy(new_as);
- return vrc;
+ return vrc < 0 ? vrc : -EINVAL;
}
uintptr_t highest_seg_end = 0;
return lrc;
}
- /* Process relocations (R_386_RELATIVE, GLOB_DAT, JMP_SLOT, R_386_32) */
- elf32_process_relocations(file, file_len, 0);
-
- /* Check for PT_INTERP — if present, load the dynamic linker */
+ /* Check for PT_INTERP first — determines relocation strategy */
const elf32_phdr_t* ph = (const elf32_phdr_t*)(file + eh->e_phoff);
uintptr_t real_entry = (uintptr_t)eh->e_entry;
int has_interp = 0;
break;
}
}
+
+ /* Process relocations — skip JMP_SLOT when ld.so will handle them lazily */
+ elf32_process_relocations(file, file_len, 0, has_interp);
+
+ /* Load DT_NEEDED shared libraries (kernel loads segments, ld.so resolves PLT) */
+ if (has_interp) {
+ elf32_load_needed_libs(file, file_len, new_as, 0);
+ }
/* 32 KB user stack with a 4 KB guard page below (unmapped).
* Guard page at stack_base - 0x1000 is left unmapped so stack overflow
* triggers a page fault → SIGSEGV instead of silent corruption.
uintptr_t sp = user_stack_base + user_stack_size;
- /* When an interpreter is loaded, push auxv entries onto the user stack
- * so ld.so can locate the program entry point and ELF headers. */
+ /* When an interpreter is loaded, save auxv entries into a static buffer.
+ * The execve handler will push them onto the user stack in the correct
+ * position (right after envp[]) so ld.so can find them. */
if (has_interp) {
- elf32_auxv_t auxv[6];
- auxv[0].a_type = AT_ENTRY; auxv[0].a_val = (uint32_t)eh->e_entry;
- auxv[1].a_type = AT_BASE; auxv[1].a_val = INTERP_BASE;
- auxv[2].a_type = AT_PAGESZ; auxv[2].a_val = 0x1000;
- auxv[3].a_type = AT_PHDR; auxv[3].a_val = (uint32_t)eh->e_phoff + (uint32_t)eh->e_entry;
- auxv[4].a_type = AT_PHNUM; auxv[4].a_val = eh->e_phnum;
- auxv[5].a_type = AT_NULL; auxv[5].a_val = 0;
- sp -= sizeof(auxv);
- memcpy((void*)sp, auxv, sizeof(auxv));
+ /* Compute AT_PHDR: find the first PT_LOAD that covers e_phoff */
+ uint32_t phdr_va = 0;
+ for (uint16_t i = 0; i < eh->e_phnum; i++) {
+ if (ph[i].p_type == PT_LOAD &&
+ eh->e_phoff >= ph[i].p_offset &&
+ eh->e_phoff < ph[i].p_offset + ph[i].p_filesz) {
+ phdr_va = ph[i].p_vaddr + (eh->e_phoff - ph[i].p_offset);
+ break;
+ }
+ }
+ g_pending_auxv[0].a_type = AT_ENTRY; g_pending_auxv[0].a_val = (uint32_t)eh->e_entry;
+ g_pending_auxv[1].a_type = AT_BASE; g_pending_auxv[1].a_val = INTERP_BASE;
+ g_pending_auxv[2].a_type = AT_PAGESZ; g_pending_auxv[2].a_val = 0x1000;
+ g_pending_auxv[3].a_type = AT_PHDR; g_pending_auxv[3].a_val = phdr_va;
+ g_pending_auxv[4].a_type = AT_PHNUM; g_pending_auxv[4].a_val = eh->e_phnum;
+ g_pending_auxv[5].a_type = AT_PHENT; g_pending_auxv[5].a_val = eh->e_phentsize;
+ g_pending_auxv[6].a_type = AT_NULL; g_pending_auxv[6].a_val = 0;
+ g_pending_auxv_count = 7;
}
/* Map vDSO shared page read-only into user address space */
volatile uint64_t* pd = pae_pd_recursive(pi);
if ((pd[di] & X86_PTE_PRESENT) == 0) {
- uint32_t pt_phys = (uint32_t)(uintptr_t)pmm_alloc_page_low();
+ uint32_t pt_phys = (uint32_t)(uintptr_t)pmm_alloc_page();
if (!pt_phys) {
kprintf("[VMM] OOM allocating page table.\n");
return;
sp &= ~(uintptr_t)0xF;
+ // Push auxv entries (if interpreter present) — must come right after envp[]
+ // so ld.so can find them by walking: argc → argv[] → envp[] → auxv.
+ {
+ elf32_auxv_t auxv_buf[8];
+ int auxv_n = elf32_pop_pending_auxv(auxv_buf, 8);
+ if (auxv_n > 0) {
+ sp -= (uintptr_t)(auxv_n * sizeof(elf32_auxv_t));
+ memcpy((void*)sp, auxv_buf, (size_t)auxv_n * sizeof(elf32_auxv_t));
+ }
+ }
+
// Push envp[] pointers
sp -= (uintptr_t)(sizeof(uintptr_t) * (envc + 1));
memcpy((void*)sp, envp_ptrs_va, sizeof(uintptr_t) * (envc + 1));
{"waitpid WNOHANG" "\\[init\\] waitpid WNOHANG OK"}
{"SIGSEGV handler" "\\[init\\] SIGSEGV OK"}
{"waitpid 100 children" "\\[init\\] waitpid OK \\(100 children"}
+ {"lazy PLT" "\\[init\\] lazy PLT OK"}
+ {"PLT cached" "\\[init\\] PLT cached OK"}
{"PING network" "\\[PING\\] .*received.*network OK"}
{"echo.elf execve" "\\[echo\\] hello from echo.elf"}
}
}
}
+ // PIE lazy PLT/GOT binding test
+ {
+ int pid = sys_fork();
+ if (pid == 0) {
+ static const char* const av[] = {"pie_test.elf", 0};
+ static const char* const ev[] = {0};
+ (void)sys_execve("/bin/pie_test.elf", av, ev);
+ sys_exit(99);
+ }
+ if (pid > 0) {
+ int st = 0;
+ (void)sys_waitpid(pid, &st, 0);
+ }
+ }
+
{
int pid = sys_fork();
if (pid < 0) {
-/* Minimal userspace dynamic linker (ld.so).
+/* Userspace dynamic linker (ld.so) with lazy PLT/GOT binding.
*
* The kernel ELF loader pushes an auxiliary vector (auxv) onto the user
- * stack when PT_INTERP is present. This linker parses auxv to find
- * AT_ENTRY (the real program entry point), then jumps there.
+ * stack when PT_INTERP is present. This linker:
+ * 1. Parses auxv to find AT_PHDR, AT_PHNUM, AT_ENTRY
+ * 2. Walks program headers to find PT_DYNAMIC
+ * 3. Extracts DT_PLTGOT, DT_JMPREL, DT_PLTRELSZ, DT_SYMTAB, DT_STRTAB
+ * 4. Sets GOT[1] = link_map pointer, GOT[2] = _dl_runtime_resolve
+ * 5. Jumps to AT_ENTRY (the real program entry point)
*
- * The kernel already performs eager relocation of R_386_RELATIVE,
- * R_386_GLOB_DAT, R_386_JMP_SLOT, and R_386_32 before transferring
- * control, so no additional relocation processing is needed here.
+ * On first PLT call, the resolver fires: looks up the symbol, patches
+ * the GOT entry, and jumps to the resolved function. Subsequent calls
+ * go directly through the patched GOT (zero overhead).
*
- * Future work: lazy PLT binding via GOT[2] resolver trampoline. */
+ * The kernel loads DT_NEEDED shared libraries at SHLIB_BASE (0x20000000).
+ * The resolver scans the .so's dynamic symtab to find undefined symbols. */
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+
+/* ---- Auxiliary vector types ---- */
#define AT_NULL 0
+#define AT_PHDR 3
+#define AT_PHENT 4
+#define AT_PHNUM 5
#define AT_ENTRY 9
-typedef unsigned int uint32_t;
+/* ---- ELF types (minimal, matching kernel include/elf.h) ---- */
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_JMPREL 23
+
+#define R_386_JMP_SLOT 7
+
+#define ELF32_R_SYM(i) ((i) >> 8)
+#define ELF32_R_TYPE(i) ((unsigned char)(i))
-struct auxv_entry {
- uint32_t a_type;
- uint32_t a_val;
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+#define ELF32_ST_BIND(i) ((i) >> 4)
+
+#define SHLIB_BASE 0x11000000U
+
+struct elf32_phdr {
+ uint32_t p_type, p_offset, p_vaddr, p_paddr;
+ uint32_t p_filesz, p_memsz, p_flags, p_align;
};
-void _start(void) __attribute__((noreturn, naked, section(".text.start")));
+struct elf32_dyn {
+ int32_t d_tag;
+ uint32_t d_val;
+};
+
+struct elf32_rel {
+ uint32_t r_offset;
+ uint32_t r_info;
+};
+
+struct elf32_sym {
+ uint32_t st_name, st_value, st_size;
+ uint8_t st_info, st_other;
+ uint16_t st_shndx;
+};
+
+/* ---- Link map: per-module metadata for the resolver ---- */
+struct link_map {
+ uint32_t l_addr; /* base load address (0 for ET_EXEC) */
+ uint32_t jmprel; /* DT_JMPREL VA (relocation table for .rel.plt) */
+ uint32_t pltrelsz; /* DT_PLTRELSZ */
+ uint32_t symtab; /* DT_SYMTAB VA */
+ uint32_t strtab; /* DT_STRTAB VA */
+ /* Shared lib symbol lookup info */
+ uint32_t shlib_symtab; /* .so DT_SYMTAB VA (0 if no .so) */
+ uint32_t shlib_strtab; /* .so DT_STRTAB VA */
+ uint32_t shlib_base; /* .so load base */
+ uint32_t shlib_hash; /* .so DT_HASH VA */
+};
+
+static struct link_map g_map;
+
+/* ---- Minimal string helpers (no libc) ---- */
+static int str_eq(const char* a, const char* b) {
+ while (*a && *b) { if (*a++ != *b++) return 0; }
+ return *a == *b;
+}
+
+/* ---- ELF hash (for DT_HASH lookup) ---- */
+static uint32_t elf_hash(const char* name) {
+ uint32_t h = 0, g;
+ while (*name) {
+ h = (h << 4) + (uint8_t)*name++;
+ g = h & 0xF0000000U;
+ if (g) h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+}
+
+/* ---- Symbol lookup in a shared library via DT_HASH ---- */
+static uint32_t shlib_lookup(const char* name, const struct link_map* map) {
+ if (!map->shlib_symtab || !map->shlib_strtab || !map->shlib_hash)
+ return 0;
+
+ const uint32_t* hashtab = (const uint32_t*)(map->shlib_hash + map->shlib_base);
+ uint32_t nbucket = hashtab[0];
+ uint32_t nchain = hashtab[1];
+ const uint32_t* bucket = &hashtab[2];
+ const uint32_t* chain = &hashtab[2 + nbucket];
+ (void)nchain;
+
+ uint32_t h = elf_hash(name) % nbucket;
+ const struct elf32_sym* symtab = (const struct elf32_sym*)(map->shlib_symtab + map->shlib_base);
+ const char* strtab = (const char*)(map->shlib_strtab + map->shlib_base);
+
+ for (uint32_t i = bucket[h]; i != 0; i = chain[i]) {
+ const struct elf32_sym* sym = &symtab[i];
+ uint8_t bind = ELF32_ST_BIND(sym->st_info);
+ if ((bind == STB_GLOBAL || bind == STB_WEAK) &&
+ sym->st_shndx != 0 && sym->st_value != 0) {
+ if (str_eq(strtab + sym->st_name, name))
+ return sym->st_value + map->shlib_base;
+ }
+ }
+ return 0;
+}
+
+/* ---- dl_fixup: called by _dl_runtime_resolve trampoline ----
+ * Resolves a single PLT entry: looks up the symbol, patches GOT,
+ * returns the resolved address. */
+uint32_t dl_fixup(struct link_map* map, uint32_t reloc_offset)
+ __attribute__((used, visibility("hidden")));
+
+uint32_t dl_fixup(struct link_map* map, uint32_t reloc_offset) {
+ const struct elf32_rel* rel =
+ (const struct elf32_rel*)(map->jmprel + reloc_offset);
+
+ uint32_t sym_idx = ELF32_R_SYM(rel->r_info);
+ const struct elf32_sym* sym =
+ &((const struct elf32_sym*)map->symtab)[sym_idx];
+
+ uint32_t resolved = 0;
+
+ if (sym->st_value != 0) {
+ resolved = sym->st_value + map->l_addr;
+ } else {
+ const char* name = (const char*)map->strtab + sym->st_name;
+ resolved = shlib_lookup(name, map);
+ }
+
+ if (resolved) {
+ uint32_t* got_entry = (uint32_t*)(rel->r_offset + map->l_addr);
+ *got_entry = resolved;
+ }
+ return resolved;
+}
+
+/* ---- _dl_runtime_resolve: PLT[0] jumps here via GOT[2] ----
+ * Entry stack: [link_map*] [reloc_offset] [return_addr]
+ * Uses the glibc i386 convention: save eax/ecx/edx, call dl_fixup,
+ * restore, ret $8 to jump to resolved function. */
+void _dl_runtime_resolve(void)
+ __attribute__((naked, used, visibility("hidden")));
+
+void _dl_runtime_resolve(void) {
+ __asm__ volatile(
+ "pushl %%eax\n"
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+ "movl 16(%%esp), %%edx\n" /* reloc_offset */
+ "movl 12(%%esp), %%eax\n" /* link_map* */
+ "pushl %%edx\n"
+ "pushl %%eax\n"
+ "call dl_fixup\n"
+ "addl $8, %%esp\n"
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "xchgl %%eax, (%%esp)\n" /* restore eax, put resolved addr on stack */
+ "ret $8\n" /* jump to resolved; pop link_map + reloc_offset */
+ ::: "memory"
+ );
+}
+
+/* ---- Parse a PT_DYNAMIC at the given VA to extract .so symtab info ---- */
+static void parse_shlib_dynamic(uint32_t dyn_va, uint32_t base) {
+ const struct elf32_dyn* d = (const struct elf32_dyn*)dyn_va;
+ for (; d->d_tag != DT_NULL; d++) {
+ switch (d->d_tag) {
+ case DT_SYMTAB: g_map.shlib_symtab = d->d_val; break;
+ case DT_STRTAB: g_map.shlib_strtab = d->d_val; break;
+ case DT_HASH: g_map.shlib_hash = d->d_val; break;
+ }
+ }
+ g_map.shlib_base = base;
+}
+
+/* ---- Scan for shared library's PT_DYNAMIC at SHLIB_BASE ---- */
+static void find_shlib_info(void) {
+ const uint8_t* base = (const uint8_t*)SHLIB_BASE;
+ /* Check ELF magic at SHLIB_BASE */
+ if (base[0] != 0x7F || base[1] != 'E' || base[2] != 'L' || base[3] != 'F')
+ return;
+
+ uint32_t e_phoff = *(const uint32_t*)(base + 28);
+ uint16_t e_phnum = *(const uint16_t*)(base + 44);
+ uint16_t e_phentsize = *(const uint16_t*)(base + 42);
+
+ for (uint16_t i = 0; i < e_phnum; i++) {
+ const struct elf32_phdr* ph =
+ (const struct elf32_phdr*)(base + e_phoff + i * e_phentsize);
+ if (ph->p_type == PT_DYNAMIC) {
+ parse_shlib_dynamic(ph->p_vaddr + SHLIB_BASE, SHLIB_BASE);
+ return;
+ }
+ }
+}
+
+/* ---- Entry point ---- */
+static void _start_c(uint32_t* initial_sp) __attribute__((noreturn, used));
+
+void _start(void) __attribute__((noreturn, naked, section(".text.start")));
void _start(void) {
__asm__ volatile(
- /* ESP points to the auxv array pushed by the kernel.
- * Scan for AT_ENTRY (type 9) to find the real program entry. */
- "mov %%esp, %%esi\n" /* esi = auxv pointer */
- "1:\n"
- "mov 0(%%esi), %%eax\n" /* eax = a_type */
- "test %%eax, %%eax\n" /* AT_NULL? */
- "jz 2f\n"
- "cmp $9, %%eax\n" /* AT_ENTRY? */
- "je 3f\n"
- "add $8, %%esi\n" /* next entry */
- "jmp 1b\n"
- "3:\n"
- "mov 4(%%esi), %%eax\n" /* eax = AT_ENTRY value */
- "jmp *%%eax\n" /* jump to real program entry */
- "2:\n"
- /* AT_ENTRY not found — exit(127) */
- "mov $2, %%eax\n" /* SYSCALL_EXIT */
- "mov $127, %%ebx\n"
- "int $0x80\n"
- "3:\n"
- "jmp 3b\n"
- ::: "eax", "esi", "memory"
+ "pushl %%esp\n"
+ "call _start_c\n"
+ ::: "memory"
+ );
+ __builtin_unreachable();
+}
+
+static void _start_c(uint32_t* initial_sp) {
+ /* Stack layout set by execve:
+ * initial_sp → argc
+ * argv[0], argv[1], ..., NULL
+ * envp[0], envp[1], ..., NULL
+ * auxv[0], auxv[1], ..., {AT_NULL, 0} */
+ uint32_t* sp = initial_sp;
+
+ uint32_t argc = *sp++;
+ sp += argc + 1; /* skip argv[] + NULL terminator */
+ while (*sp) sp++; /* skip envp[] entries */
+ sp++; /* skip envp NULL terminator */
+
+ /* sp now points to auxv array */
+ uint32_t at_entry = 0;
+ uint32_t at_phdr = 0;
+ uint32_t at_phnum = 0;
+ uint32_t at_phent = 0;
+
+ for (uint32_t* p = sp; p[0] != AT_NULL; p += 2) {
+ switch (p[0]) {
+ case AT_ENTRY: at_entry = p[1]; break;
+ case AT_PHDR: at_phdr = p[1]; break;
+ case AT_PHNUM: at_phnum = p[1]; break;
+ case AT_PHENT: at_phent = p[1]; break;
+ }
+ }
+
+ if (!at_entry) {
+ __asm__ volatile("mov $2, %%eax\n mov $127, %%ebx\n int $0x80" ::: "eax", "ebx");
+ __builtin_unreachable();
+ }
+
+ /* Walk program headers to find PT_DYNAMIC */
+ g_map.l_addr = 0;
+
+ if (at_phdr && at_phnum && at_phent) {
+ for (uint32_t i = 0; i < at_phnum; i++) {
+ const struct elf32_phdr* ph =
+ (const struct elf32_phdr*)(at_phdr + i * at_phent);
+ if (ph->p_type == PT_DYNAMIC) {
+ uint32_t dyn_va = ph->p_vaddr + g_map.l_addr;
+ const struct elf32_dyn* d = (const struct elf32_dyn*)dyn_va;
+ uint32_t pltgot = 0;
+
+ for (; d->d_tag != DT_NULL; d++) {
+ switch (d->d_tag) {
+ case DT_PLTGOT: pltgot = d->d_val; break;
+ case DT_JMPREL: g_map.jmprel = d->d_val; break;
+ case DT_PLTRELSZ: g_map.pltrelsz = d->d_val; break;
+ case DT_SYMTAB: g_map.symtab = d->d_val; break;
+ case DT_STRTAB: g_map.strtab = d->d_val; break;
+ }
+ }
+
+ /* Set up GOT for lazy binding:
+ * GOT[0] = _DYNAMIC (already set by linker)
+ * GOT[1] = link_map pointer
+ * GOT[2] = _dl_runtime_resolve address */
+ if (pltgot && g_map.jmprel) {
+ uint32_t* got = (uint32_t*)(pltgot + g_map.l_addr);
+ got[1] = (uint32_t)&g_map;
+ got[2] = (uint32_t)&_dl_runtime_resolve;
+ }
+ break;
+ }
+ }
+ }
+
+ /* Scan for shared library info at SHLIB_BASE */
+ find_shlib_info();
+
+ /* Jump to the real program entry point */
+ __asm__ volatile(
+ "jmp *%0\n"
+ :: "r"(at_entry)
+ : "memory"
);
+ __builtin_unreachable();
}
--- /dev/null
+ENTRY(_start)
+
+SECTIONS
+{
+ . = 0x12000000;
+
+ .text : {
+ *(.text.start)
+ *(.text .text.*)
+ }
+
+ .rodata : { *(.rodata*) }
+
+ . = ALIGN(0x1000);
+
+ .data : { *(.data*) }
+
+ .bss : {
+ *(.bss*)
+ *(COMMON)
+ }
+}
--- /dev/null
+/* Shared library function for PLT/GOT lazy binding test.
+ * Compiled as a shared object (libpietest.so), loaded at SHLIB_BASE by kernel.
+ * The main PIE binary calls test_add() through PLT — resolved lazily by ld.so. */
+
+int test_add(int a, int b) {
+ return a + b;
+}
--- /dev/null
+ENTRY(_start)
+
+SECTIONS
+{
+ . = 0x10000000;
+
+ .interp : { *(.interp) }
+ .hash : { *(.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .rel.plt : { *(.rel.plt) }
+ .plt : { *(.plt) *(.plt.*) }
+
+ .text : {
+ *(.text.start)
+ *(.text .text.*)
+ }
+
+ .rodata : { *(.rodata*) }
+
+ . = ALIGN(0x1000);
+
+ .dynamic : { *(.dynamic) }
+ .got.plt : { *(.got.plt) }
+ .got : { *(.got) }
+
+ .data : { *(.data*) }
+
+ .bss : {
+ *(.bss*)
+ *(COMMON)
+ }
+}
--- /dev/null
+/* PIE test binary for PLT/GOT lazy binding verification.
+ * Calls test_add() from libpietest.so through PLT — resolved lazily by ld.so.
+ * Built as: i686-elf-ld -pie --dynamic-linker=/lib/ld.so */
+
+static inline void sys_exit(int code) {
+ __asm__ volatile("int $0x80" :: "a"(2), "b"(code) : "memory");
+}
+
+static inline int sys_write(int fd, const void* buf, unsigned len) {
+ int ret;
+ __asm__ volatile("int $0x80" : "=a"(ret) : "a"(1), "b"(fd), "c"(buf), "d"(len) : "memory");
+ return ret;
+}
+
+extern int test_add(int a, int b);
+
+void _start(void) {
+ int r = test_add(38, 4);
+ if (r == 42) {
+ sys_write(1, "[init] lazy PLT OK\n", 19);
+ } else {
+ sys_write(1, "[init] lazy PLT FAIL\n", 21);
+ }
+
+ /* Call again — this time GOT is already patched, tests direct path */
+ r = test_add(100, 23);
+ if (r == 123) {
+ sys_write(1, "[init] PLT cached OK\n", 21);
+ } else {
+ sys_write(1, "[init] PLT cached FAIL\n", 23);
+ }
+
+ sys_exit(0);
+}