/* Userspace dynamic linker (ld.so) with lazy PLT/GOT binding.
*
* The kernel ELF loader pushes an auxiliary vector (auxv) onto the user
* stack when PT_INTERP is present. This linker:
* 1. Parses auxv to find AT_PHDR, AT_PHNUM, AT_ENTRY
* 2. Walks program headers to find PT_DYNAMIC
* 3. Extracts DT_PLTGOT, DT_JMPREL, DT_PLTRELSZ, DT_SYMTAB, DT_STRTAB
* 4. Sets GOT[1] = link_map pointer, GOT[2] = _dl_runtime_resolve
* 5. Jumps to AT_ENTRY (the real program entry point)
*
* On first PLT call, the resolver fires: looks up the symbol, patches
* the GOT entry, and jumps to the resolved function. Subsequent calls
* go directly through the patched GOT (zero overhead).
*
* The kernel loads DT_NEEDED shared libraries at SHLIB_BASE (0x20000000).
* The resolver scans the .so's dynamic symtab to find undefined symbols. */
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef int int32_t;
/* ---- Auxiliary vector types ---- */
#define AT_NULL 0
#define AT_PHDR 3
#define AT_PHENT 4
#define AT_PHNUM 5
#define AT_ENTRY 9
/* ---- ELF types (minimal, matching kernel include/elf.h) ---- */
#define PT_LOAD 1
#define PT_DYNAMIC 2
#define DT_NULL 0
#define DT_NEEDED 1
#define DT_PLTRELSZ 2
#define DT_PLTGOT 3
#define DT_HASH 4
#define DT_STRTAB 5
#define DT_SYMTAB 6
#define DT_STRSZ 10
#define DT_SYMENT 11
#define DT_REL 17
#define DT_RELSZ 18
#define DT_JMPREL 23
#define R_386_32 1
#define R_386_COPY 5
#define R_386_GLOB_DAT 6
#define R_386_JMP_SLOT 7
#define ELF32_R_SYM(i) ((i) >> 8)
#define ELF32_R_TYPE(i) ((unsigned char)(i))
#define STB_GLOBAL 1
#define STB_WEAK 2
#define ELF32_ST_BIND(i) ((i) >> 4)
#define SHLIB_BASE 0x11000000U
struct elf32_phdr {
uint32_t p_type, p_offset, p_vaddr, p_paddr;
uint32_t p_filesz, p_memsz, p_flags, p_align;
};
struct elf32_dyn {
int32_t d_tag;
uint32_t d_val;
};
struct elf32_rel {
uint32_t r_offset;
uint32_t r_info;
};
struct elf32_sym {
uint32_t st_name, st_value, st_size;
uint8_t st_info, st_other;
uint16_t st_shndx;
};
/* ---- Link map: per-module metadata for the resolver ---- */
struct link_map {
uint32_t l_addr; /* base load address (0 for ET_EXEC) */
uint32_t jmprel; /* DT_JMPREL VA (relocation table for .rel.plt) */
uint32_t pltrelsz; /* DT_PLTRELSZ */
uint32_t symtab; /* DT_SYMTAB VA */
uint32_t strtab; /* DT_STRTAB VA */
uint32_t rel; /* DT_REL VA (eager relocations) */
uint32_t relsz; /* DT_RELSZ */
/* Shared lib symbol lookup info */
uint32_t shlib_symtab; /* .so DT_SYMTAB VA (0 if no .so) */
uint32_t shlib_strtab; /* .so DT_STRTAB VA */
uint32_t shlib_base; /* .so load base */
uint32_t shlib_hash; /* .so DT_HASH VA */
};
static struct link_map g_map;
/* ---- Minimal string helpers (no libc) ---- */
static int str_eq(const char* a, const char* b) {
while (*a && *b) { if (*a++ != *b++) return 0; }
return *a == *b;
}
/* ---- ELF hash (for DT_HASH lookup) ---- */
static uint32_t elf_hash(const char* name) {
uint32_t h = 0, g;
while (*name) {
h = (h << 4) + (uint8_t)*name++;
g = h & 0xF0000000U;
if (g) h ^= g >> 24;
h &= ~g;
}
return h;
}
/* ---- Symbol lookup in a shared library via DT_HASH ---- */
static uint32_t shlib_lookup(const char* name, const struct link_map* map) {
if (!map->shlib_symtab || !map->shlib_strtab || !map->shlib_hash)
return 0;
const uint32_t* hashtab = (const uint32_t*)(map->shlib_hash + map->shlib_base);
uint32_t nbucket = hashtab[0];
uint32_t nchain = hashtab[1];
const uint32_t* bucket = &hashtab[2];
const uint32_t* chain = &hashtab[2 + nbucket];
(void)nchain;
uint32_t h = elf_hash(name) % nbucket;
const struct elf32_sym* symtab = (const struct elf32_sym*)(map->shlib_symtab + map->shlib_base);
const char* strtab = (const char*)(map->shlib_strtab + map->shlib_base);
for (uint32_t i = bucket[h]; i != 0; i = chain[i]) {
const struct elf32_sym* sym = &symtab[i];
uint8_t bind = ELF32_ST_BIND(sym->st_info);
if ((bind == STB_GLOBAL || bind == STB_WEAK) &&
sym->st_shndx != 0 && sym->st_value != 0) {
if (str_eq(strtab + sym->st_name, name))
return sym->st_value + map->shlib_base;
}
}
return 0;
}
/* ---- dl_fixup: called by _dl_runtime_resolve trampoline ----
* Resolves a single PLT entry: looks up the symbol, patches GOT,
* returns the resolved address. */
uint32_t dl_fixup(struct link_map* map, uint32_t reloc_offset)
__attribute__((used, visibility("hidden")));
uint32_t dl_fixup(struct link_map* map, uint32_t reloc_offset) {
const struct elf32_rel* rel =
(const struct elf32_rel*)(map->jmprel + reloc_offset);
uint32_t sym_idx = ELF32_R_SYM(rel->r_info);
const struct elf32_sym* sym =
&((const struct elf32_sym*)map->symtab)[sym_idx];
uint32_t resolved = 0;
if (sym->st_value != 0) {
resolved = sym->st_value + map->l_addr;
} else {
const char* name = (const char*)map->strtab + sym->st_name;
resolved = shlib_lookup(name, map);
}
if (resolved) {
uint32_t* got_entry = (uint32_t*)(rel->r_offset + map->l_addr);
*got_entry = resolved;
}
return resolved;
}
/* ---- _dl_runtime_resolve: PLT[0] jumps here via GOT[2] ----
* Entry stack: [link_map*] [reloc_offset] [return_addr]
* Uses the glibc i386 convention: save eax/ecx/edx, call dl_fixup,
* restore, ret $8 to jump to resolved function. */
void _dl_runtime_resolve(void)
__attribute__((naked, used, visibility("hidden")));
void _dl_runtime_resolve(void) {
__asm__ volatile(
"pushl %%eax\n"
"pushl %%ecx\n"
"pushl %%edx\n"
"movl 16(%%esp), %%edx\n" /* reloc_offset */
"movl 12(%%esp), %%eax\n" /* link_map* */
"pushl %%edx\n"
"pushl %%eax\n"
"call dl_fixup\n"
"addl $8, %%esp\n"
"popl %%edx\n"
"popl %%ecx\n"
"xchgl %%eax, (%%esp)\n" /* restore eax, put resolved addr on stack */
"ret $8\n" /* jump to resolved; pop link_map + reloc_offset */
::: "memory"
);
}
/* ---- Parse a PT_DYNAMIC at the given VA to extract .so symtab info ---- */
static void parse_shlib_dynamic(uint32_t dyn_va, uint32_t base) {
const struct elf32_dyn* d = (const struct elf32_dyn*)dyn_va;
for (; d->d_tag != DT_NULL; d++) {
switch (d->d_tag) {
case DT_SYMTAB: g_map.shlib_symtab = d->d_val; break;
case DT_STRTAB: g_map.shlib_strtab = d->d_val; break;
case DT_HASH: g_map.shlib_hash = d->d_val; break;
}
}
g_map.shlib_base = base;
}
/* ---- Scan for shared library's PT_DYNAMIC at SHLIB_BASE ---- */
static void find_shlib_info(void) {
const uint8_t* base = (const uint8_t*)SHLIB_BASE;
/* Check ELF magic at SHLIB_BASE */
if (base[0] != 0x7F || base[1] != 'E' || base[2] != 'L' || base[3] != 'F')
return;
uint32_t e_phoff = *(const uint32_t*)(base + 28);
uint16_t e_phnum = *(const uint16_t*)(base + 44);
uint16_t e_phentsize = *(const uint16_t*)(base + 42);
for (uint16_t i = 0; i < e_phnum; i++) {
const struct elf32_phdr* ph =
(const struct elf32_phdr*)(base + e_phoff + i * e_phentsize);
if (ph->p_type == PT_DYNAMIC) {
parse_shlib_dynamic(ph->p_vaddr + SHLIB_BASE, SHLIB_BASE);
return;
}
}
}
/* ---- Entry point ---- */
static void _start_c(uint32_t* initial_sp) __attribute__((noreturn, used));
void _start(void) __attribute__((noreturn, naked, section(".text.start")));
void _start(void) {
__asm__ volatile(
"pushl %%esp\n"
"call _start_c\n"
::: "memory"
);
__builtin_unreachable();
}
static void _start_c(uint32_t* initial_sp) {
/* Stack layout set by execve:
* initial_sp → argc
* argv[0], argv[1], ..., NULL
* envp[0], envp[1], ..., NULL
* auxv[0], auxv[1], ..., {AT_NULL, 0} */
uint32_t* sp = initial_sp;
uint32_t argc = *sp++;
sp += argc + 1; /* skip argv[] + NULL terminator */
while (*sp) sp++; /* skip envp[] entries */
sp++; /* skip envp NULL terminator */
/* sp now points to auxv array */
uint32_t at_entry = 0;
uint32_t at_phdr = 0;
uint32_t at_phnum = 0;
uint32_t at_phent = 0;
for (uint32_t* p = sp; p[0] != AT_NULL; p += 2) {
switch (p[0]) {
case AT_ENTRY: at_entry = p[1]; break;
case AT_PHDR: at_phdr = p[1]; break;
case AT_PHNUM: at_phnum = p[1]; break;
case AT_PHENT: at_phent = p[1]; break;
}
}
if (!at_entry) {
__asm__ volatile("mov $2, %%eax\n mov $127, %%ebx\n int $0x80" ::: "eax", "ebx");
__builtin_unreachable();
}
/* Walk program headers to find PT_DYNAMIC */
g_map.l_addr = 0;
if (at_phdr && at_phnum && at_phent) {
for (uint32_t i = 0; i < at_phnum; i++) {
const struct elf32_phdr* ph =
(const struct elf32_phdr*)(at_phdr + i * at_phent);
if (ph->p_type == PT_DYNAMIC) {
uint32_t dyn_va = ph->p_vaddr + g_map.l_addr;
const struct elf32_dyn* d = (const struct elf32_dyn*)dyn_va;
uint32_t pltgot = 0;
for (; d->d_tag != DT_NULL; d++) {
switch (d->d_tag) {
case DT_PLTGOT: pltgot = d->d_val; break;
case DT_JMPREL: g_map.jmprel = d->d_val; break;
case DT_PLTRELSZ: g_map.pltrelsz = d->d_val; break;
case DT_SYMTAB: g_map.symtab = d->d_val; break;
case DT_STRTAB: g_map.strtab = d->d_val; break;
case DT_REL: g_map.rel = d->d_val; break;
case DT_RELSZ: g_map.relsz = d->d_val; break;
}
}
/* Scan for shared library info BEFORE resolving relocations */
find_shlib_info();
/* Set up GOT for lazy binding:
* GOT[0] = _DYNAMIC (already set by linker)
* GOT[1] = link_map pointer
* GOT[2] = _dl_runtime_resolve address */
if (pltgot && g_map.jmprel) {
uint32_t* got = (uint32_t*)(pltgot + g_map.l_addr);
got[1] = (uint32_t)&g_map;
got[2] = (uint32_t)&_dl_runtime_resolve;
}
/* Process eager relocations (R_386_GLOB_DAT, R_386_COPY) */
if (g_map.rel && g_map.relsz) {
uint32_t nrel = g_map.relsz / sizeof(struct elf32_rel);
const struct elf32_rel* rtab =
(const struct elf32_rel*)(g_map.rel + g_map.l_addr);
for (uint32_t j = 0; j < nrel; j++) {
uint32_t type = ELF32_R_TYPE(rtab[j].r_info);
uint32_t sidx = ELF32_R_SYM(rtab[j].r_info);
uint32_t* target = (uint32_t*)(rtab[j].r_offset + g_map.l_addr);
if (type == R_386_GLOB_DAT || type == R_386_JMP_SLOT) {
const struct elf32_sym* s =
&((const struct elf32_sym*)g_map.symtab)[sidx];
uint32_t addr = 0;
if (s->st_value != 0)
addr = s->st_value + g_map.l_addr;
else {
const char* nm = (const char*)g_map.strtab + s->st_name;
addr = shlib_lookup(nm, &g_map);
}
if (addr) *target = addr;
} else if (type == R_386_COPY && sidx) {
const struct elf32_sym* s =
&((const struct elf32_sym*)g_map.symtab)[sidx];
const char* nm = (const char*)g_map.strtab + s->st_name;
uint32_t src = shlib_lookup(nm, &g_map);
if (src && s->st_size > 0) {
const uint8_t* sp = (const uint8_t*)src;
uint8_t* dp = (uint8_t*)target;
for (uint32_t k = 0; k < s->st_size; k++)
dp[k] = sp[k];
}
}
}
}
break;
}
}
}
/* Restore the original stack pointer so the real program's _start
* sees the correct layout: [argc] [argv...] [NULL] [envp...] [NULL] [auxv...]
* Then jump to the program entry point. */
__asm__ volatile(
"mov %0, %%esp\n"
"jmp *%1\n"
:: "r"(initial_sp), "r"(at_entry)
: "memory"
);
__builtin_unreachable();
}