@echo " GRUB-MKRESCUE adros-$(ARCH).iso"
@grub-mkrescue -o adros-$(ARCH).iso iso > /dev/null
-$(MKINITRD): tools/mkinitrd.c
- @gcc tools/mkinitrd.c -o $(MKINITRD)
+$(MKINITRD): tools/mkinitrd.c include/xxhash32.h
+ @gcc -Iinclude tools/mkinitrd.c -o $(MKINITRD)
ULIBC_DIR := user/ulibc
ULIBC_LIB := $(ULIBC_DIR)/libulibc.a
#include <stdint.h>
// Initialize InitRD and return the root node (directory)
-fs_node_t* initrd_init(uint32_t location);
+// location: virtual address of initrd data
+// size: total size in bytes (initrd_end - initrd_start)
+fs_node_t* initrd_init(uint32_t location, uint32_t size);
#endif
*
* Returns the number of bytes written to dst, or negative on error.
*/
-int lz4_decompress_block(const void* src, size_t src_size,
- void* dst, size_t dst_cap);
+int lz4_decompress_block(const void *src, size_t src_size,
+ void *dst, size_t dst_cap);
/*
- * InitRD LZ4 wrapper header (prepended to compressed tar):
- * [0..3] magic "LZ4B"
- * [4..7] orig_sz uint32_t LE — uncompressed size
- * [8..11] comp_sz uint32_t LE — compressed block size
- * [12..] LZ4 compressed block data
+ * LZ4 Frame decompressor (official LZ4 Frame format).
+ *
+ * Reference: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
+ *
+ * Parses the frame header, decompresses all data blocks, and optionally
+ * verifies the content checksum.
+ *
+ * src - pointer to LZ4 frame data (starts with magic 0x184D2204)
+ * src_size - total size of frame data in bytes
+ * dst - pointer to output buffer
+ * dst_cap - capacity of the output buffer
+ *
+ * Returns total decompressed bytes, or negative on error.
*/
-#define LZ4B_MAGIC "LZ4B"
-#define LZ4B_MAGIC_U32 0x42345A4CU /* "LZ4B" as little-endian uint32 */
-#define LZ4B_HDR_SIZE 12
+int lz4_decompress_frame(const void *src, size_t src_size,
+ void *dst, size_t dst_cap);
+
+/* Official LZ4 Frame magic number (little-endian) */
+#define LZ4_FRAME_MAGIC 0x184D2204U
+
+/* Legacy custom "LZ4B" magic (kept for backward-compat detection) */
+#define LZ4B_MAGIC_U32 0x42345A4CU /* "LZ4B" as little-endian uint32 */
+#define LZ4B_HDR_SIZE 12
#endif /* LZ4_H */
--- /dev/null
+#ifndef XXHASH32_H
+#define XXHASH32_H
+
+/*
+ * xxHash-32 — standalone, header-only implementation.
+ *
+ * Reference: https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
+ *
+ * Used by the LZ4 Frame format for header and content checksums.
+ * Works in both freestanding (kernel) and hosted (tools) environments.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define XXH_PRIME32_1 0x9E3779B1U
+#define XXH_PRIME32_2 0x85EBCA77U
+#define XXH_PRIME32_3 0xC2B2AE3DU
+#define XXH_PRIME32_4 0x27D4EB2FU
+#define XXH_PRIME32_5 0x165667B1U
+
+static inline uint32_t xxh32_rotl(uint32_t x, int r) {
+ return (x << r) | (x >> (32 - r));
+}
+
+static inline uint32_t xxh32_read32(const uint8_t *p) {
+ return (uint32_t)p[0] | ((uint32_t)p[1] << 8) |
+ ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+}
+
+static inline uint32_t xxh32(const void *input, size_t len, uint32_t seed) {
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *end = p + len;
+ uint32_t h32;
+
+ if (len >= 16) {
+ const uint8_t *limit = end - 16;
+ uint32_t v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+ uint32_t v2 = seed + XXH_PRIME32_2;
+ uint32_t v3 = seed + 0;
+ uint32_t v4 = seed - XXH_PRIME32_1;
+
+ do {
+ v1 += xxh32_read32(p) * XXH_PRIME32_2;
+ v1 = xxh32_rotl(v1, 13) * XXH_PRIME32_1;
+ p += 4;
+ v2 += xxh32_read32(p) * XXH_PRIME32_2;
+ v2 = xxh32_rotl(v2, 13) * XXH_PRIME32_1;
+ p += 4;
+ v3 += xxh32_read32(p) * XXH_PRIME32_2;
+ v3 = xxh32_rotl(v3, 13) * XXH_PRIME32_1;
+ p += 4;
+ v4 += xxh32_read32(p) * XXH_PRIME32_2;
+ v4 = xxh32_rotl(v4, 13) * XXH_PRIME32_1;
+ p += 4;
+ } while (p <= limit);
+
+ h32 = xxh32_rotl(v1, 1) + xxh32_rotl(v2, 7) +
+ xxh32_rotl(v3, 12) + xxh32_rotl(v4, 18);
+ } else {
+ h32 = seed + XXH_PRIME32_5;
+ }
+
+ h32 += (uint32_t)len;
+
+ while (p + 4 <= end) {
+ h32 += xxh32_read32(p) * XXH_PRIME32_3;
+ h32 = xxh32_rotl(h32, 17) * XXH_PRIME32_4;
+ p += 4;
+ }
+
+ while (p < end) {
+ h32 += (uint32_t)(*p) * XXH_PRIME32_5;
+ h32 = xxh32_rotl(h32, 11) * XXH_PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= XXH_PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= XXH_PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+
+#endif /* XXHASH32_H */
}
}
-fs_node_t* initrd_init(uint32_t location) {
+fs_node_t* initrd_init(uint32_t location, uint32_t size) {
const uint8_t* raw = (const uint8_t*)(uintptr_t)location;
uint8_t* decomp_buf = NULL;
- /* Detect LZ4B compressed initrd */
- if (raw[0] == 'L' && raw[1] == 'Z' && raw[2] == '4' && raw[3] == 'B') {
+ /* Detect LZ4-compressed initrd */
+ uint32_t magic32 = (uint32_t)raw[0] | ((uint32_t)raw[1] << 8) |
+ ((uint32_t)raw[2] << 16) | ((uint32_t)raw[3] << 24);
+
+ if (magic32 == LZ4_FRAME_MAGIC) {
+ /* Official LZ4 Frame format — extract content size from header */
+ uint8_t flg = raw[4];
+ uint32_t orig_sz = 0;
+ if (flg & 0x08) { /* Content Size flag */
+ orig_sz = (uint32_t)raw[6] | ((uint32_t)raw[7] << 8) |
+ ((uint32_t)raw[8] << 16) | ((uint32_t)raw[9] << 24);
+ } else {
+ orig_sz = 4U * 1024U * 1024U;
+ }
+
+ decomp_buf = (uint8_t*)kmalloc(orig_sz);
+ if (!decomp_buf) {
+ kprintf("[INITRD] OOM decompressing LZ4 (%u bytes)\n", orig_sz);
+ return 0;
+ }
+
+ int ret = lz4_decompress_frame(raw, size, decomp_buf, orig_sz);
+ if (ret < 0) {
+ kprintf("[INITRD] LZ4 Frame decompress failed (ret=%d)\n", ret);
+ kfree(decomp_buf);
+ return 0;
+ }
+
+ kprintf("[INITRD] LZ4: %u -> %d bytes\n", size, ret);
+ location = (uint32_t)(uintptr_t)decomp_buf;
+ } else if (magic32 == LZ4B_MAGIC_U32) {
+ /* Legacy LZ4B format (backward compatibility) */
uint32_t orig_sz = (uint32_t)raw[4] | ((uint32_t)raw[5] << 8) |
((uint32_t)raw[6] << 16) | ((uint32_t)raw[7] << 24);
uint32_t comp_sz = (uint32_t)raw[8] | ((uint32_t)raw[9] << 8) |
uintptr_t initrd_virt = 0;
if (hal_mm_map_physical_range((uintptr_t)bi->initrd_start, (uintptr_t)bi->initrd_end,
HAL_MM_MAP_RW, &initrd_virt) == 0) {
- fs_root = initrd_init((uint32_t)initrd_virt);
+ uint32_t initrd_sz = (uint32_t)(bi->initrd_end - bi->initrd_start);
+ fs_root = initrd_init((uint32_t)initrd_virt, initrd_sz);
} else {
kprintf("[INITRD] Failed to map initrd physical range.\n");
}
#include "lz4.h"
+#include "xxhash32.h"
/*
* LZ4 block decompressor — minimal, standalone, no dependencies beyond memcpy.
* [extra match length bytes if low nibble == 15]
*/
+static uint32_t read_le32(const uint8_t *p) {
+ return (uint32_t)p[0] | ((uint32_t)p[1] << 8) |
+ ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
+}
+
+static uint64_t read_le64(const uint8_t *p) {
+ return (uint64_t)read_le32(p) | ((uint64_t)read_le32(p + 4) << 32);
+}
+
int lz4_decompress_block(const void *src, size_t src_size,
void *dst, size_t dst_cap)
{
return (int)(op - (uint8_t *)dst);
}
+
+/*
+ * LZ4 Frame decompressor — official LZ4 Frame format.
+ *
+ * Reference: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
+ *
+ * Supports:
+ * - Block-independent mode
+ * - Content size field (optional, used for validation)
+ * - Content checksum (optional, verified if present)
+ * - Single and multi-block frames
+ *
+ * Does NOT support:
+ * - Block checksums (skipped if flag set)
+ * - Linked blocks (returns error)
+ * - Dictionary IDs (ignored)
+ */
+int lz4_decompress_frame(const void *src, size_t src_size,
+ void *dst, size_t dst_cap)
+{
+ const uint8_t *ip = (const uint8_t *)src;
+ const uint8_t *ip_end = ip + src_size;
+ uint8_t *op = (uint8_t *)dst;
+ size_t total_out = 0;
+
+ /* --- Magic Number (4 bytes) --- */
+ if (src_size < 7) return -1; /* minimum: magic + FLG + BD + HC */
+ if (read_le32(ip) != LZ4_FRAME_MAGIC) return -1;
+ ip += 4;
+
+ /* --- Frame Descriptor --- */
+ const uint8_t *desc_start = ip;
+
+ uint8_t flg = *ip++;
+ uint8_t bd = *ip++;
+ (void)bd; /* block max size — not enforced by decompressor */
+
+ /* Parse FLG */
+ int version = (flg >> 6) & 0x03;
+ int block_indep = (flg >> 5) & 0x01;
+ int block_checksum = (flg >> 4) & 0x01;
+ int content_size_flag = (flg >> 3) & 0x01;
+ int content_checksum = (flg >> 2) & 0x01;
+ /* bit 1 reserved, bit 0 = dict ID */
+ int dict_id_flag = (flg >> 0) & 0x01;
+
+ if (version != 1) return -1; /* only version 01 defined */
+ if (!block_indep) return -1; /* linked blocks not supported */
+
+ uint64_t content_size = 0;
+ if (content_size_flag) {
+ if (ip + 8 > ip_end) return -1;
+ content_size = read_le64(ip);
+ ip += 8;
+ }
+
+ if (dict_id_flag) {
+ if (ip + 4 > ip_end) return -1;
+ ip += 4; /* skip dictionary ID */
+ }
+
+ /* Header Checksum (1 byte) = (xxHash32(descriptor) >> 8) & 0xFF */
+ if (ip + 1 > ip_end) return -1;
+ {
+ size_t desc_len = (size_t)(ip - desc_start);
+ uint8_t expected_hc = (uint8_t)((xxh32(desc_start, desc_len, 0) >> 8) & 0xFF);
+ if (*ip != expected_hc) return -1;
+ }
+ ip++;
+
+ /* --- Data Blocks --- */
+ for (;;) {
+ if (ip + 4 > ip_end) return -1;
+ uint32_t block_size = read_le32(ip);
+ ip += 4;
+
+ if (block_size == 0) break; /* EndMark */
+
+ int is_uncompressed = (block_size >> 31) & 1;
+ block_size &= 0x7FFFFFFFU;
+
+ if (ip + block_size > ip_end) return -1;
+
+ if (is_uncompressed) {
+ if (total_out + block_size > dst_cap) return -1;
+ for (uint32_t i = 0; i < block_size; i++)
+ op[i] = ip[i];
+ op += block_size;
+ total_out += block_size;
+ } else {
+ int ret = lz4_decompress_block(ip, block_size,
+ op, dst_cap - total_out);
+ if (ret < 0) return -1;
+ op += ret;
+ total_out += (size_t)ret;
+ }
+ ip += block_size;
+
+ /* Skip block checksum if present */
+ if (block_checksum) {
+ if (ip + 4 > ip_end) return -1;
+ ip += 4;
+ }
+ }
+
+ /* --- Content Checksum (optional) --- */
+ if (content_checksum) {
+ if (ip + 4 > ip_end) return -1;
+ uint32_t expected = read_le32(ip);
+ uint32_t actual = xxh32(dst, total_out, 0);
+ if (expected != actual) return -1;
+ }
+
+ /* Validate content size if declared */
+ if (content_size_flag && (uint64_t)total_out != content_size)
+ return -1;
+
+ return (int)total_out;
+}
#include <stdint.h>
#include <string.h>
+#include "xxhash32.h"
+
#define TAR_BLOCK 512
+/* Official LZ4 Frame magic */
+#define LZ4_FRAME_MAGIC 0x184D2204U
+
/* ---- LZ4 block compressor (standalone, no external dependency) ---- */
#define LZ4_HASH_BITS 16
#define LZ4_HASH_SIZE (1 << LZ4_HASH_BITS)
#define LZ4_MIN_MATCH 4
#define LZ4_LAST_LITERALS 5 /* last 5 bytes are always literals */
+#define LZ4_MFLIMIT 12 /* last match must start >= 12 bytes before end */
static uint32_t lz4_hash4(const uint8_t *p) {
uint32_t v;
const uint8_t *ip = src;
const uint8_t *ip_end = src + src_size;
- const uint8_t *ip_limit = ip_end - LZ4_LAST_LITERALS;
+ const uint8_t *match_limit = ip_end - LZ4_LAST_LITERALS;
+ const uint8_t *ip_limit = ip_end - LZ4_MFLIMIT;
const uint8_t *anchor = ip; /* start of pending literals */
uint8_t *op = dst;
uint8_t *op_end = dst + dst_cap;
continue;
}
- /* extend match forward */
+ /* extend match forward (stop at match_limit = srcEnd - 5) */
size_t match_len = LZ4_MIN_MATCH;
- while (ip + match_len < ip_end && ip[match_len] == ref[match_len])
+ while (ip + match_len < match_limit && ip[match_len] == ref[match_len])
match_len++;
/* emit sequence */
return (size_t)(op - dst);
}
-/* LZ4B header: magic(4) + orig_size(4) + comp_size(4) */
-#define LZ4B_HDR_SIZE 12
-
static void write_le32(uint8_t *p, uint32_t v) {
p[0] = (uint8_t)(v);
p[1] = (uint8_t)(v >> 8);
p[3] = (uint8_t)(v >> 24);
}
+static void write_le64(uint8_t *p, uint64_t v) {
+ write_le32(p, (uint32_t)v);
+ write_le32(p + 4, (uint32_t)(v >> 32));
+}
+
/* ---- end LZ4 ---- */
typedef struct {
FILE* out = fopen(out_name, "wb");
if (!out) { perror("fopen"); free(tar_buf); free(comp_buf); return 1; }
- /* Write LZ4B header */
- uint8_t hdr[LZ4B_HDR_SIZE];
- memcpy(hdr, "LZ4B", 4);
- write_le32(hdr + 4, (uint32_t)tar_len);
- write_le32(hdr + 8, (uint32_t)comp_sz);
- fwrite(hdr, 1, LZ4B_HDR_SIZE, out);
+ /*
+ * Write official LZ4 Frame format:
+ * Magic(4) + FLG(1) + BD(1) + ContentSize(8) + HC(1)
+ * + BlockSize(4) + BlockData(comp_sz)
+ * + EndMark(4)
+ * + ContentChecksum(4)
+ */
+
+ /* Magic number */
+ uint8_t magic[4];
+ write_le32(magic, LZ4_FRAME_MAGIC);
+ fwrite(magic, 1, 4, out);
+
+ /* Frame descriptor: FLG + BD + ContentSize */
+ uint8_t desc[10];
+ /* FLG: version=01, B.Indep=1, B.Checksum=0,
+ * ContentSize=1, ContentChecksum=1, Reserved=0, DictID=0 */
+ desc[0] = 0x6C; /* 0b01101100 */
+ /* BD: Block MaxSize=7 (4MB) */
+ desc[1] = 0x70; /* 0b01110000 */
+ /* Content size (8 bytes LE) */
+ write_le64(desc + 2, (uint64_t)tar_len);
+
+ /* Header checksum = (xxHash32(descriptor) >> 8) & 0xFF */
+ uint8_t hc = (uint8_t)((xxh32(desc, 10, 0) >> 8) & 0xFF);
+ fwrite(desc, 1, 10, out);
+ fwrite(&hc, 1, 1, out);
+
+ /* Data block: size (4 bytes) + compressed data */
+ uint8_t bsz[4];
+ write_le32(bsz, (uint32_t)comp_sz);
+ fwrite(bsz, 1, 4, out);
fwrite(comp_buf, 1, comp_sz, out);
+
+ /* EndMark (0x00000000) */
+ uint8_t endmark[4] = {0, 0, 0, 0};
+ fwrite(endmark, 1, 4, out);
+
+ /* Content checksum (xxHash32 of original data) */
+ uint32_t content_cksum = xxh32(tar_buf, tar_len, 0);
+ uint8_t cc[4];
+ write_le32(cc, content_cksum);
+ fwrite(cc, 1, 4, out);
+
fclose(out);
- printf("Done. InitRD size: %zu bytes (LZ4B header + compressed).\n",
- LZ4B_HDR_SIZE + comp_sz);
+ size_t frame_sz = 4 + 10 + 1 + 4 + comp_sz + 4 + 4;
+ printf("Done. InitRD size: %zu bytes (LZ4 Frame).\n", frame_sz);
}
free(tar_buf);