diff --git a/CMakeLists.txt b/CMakeLists.txt index d089cc0..ee202f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,15 +19,26 @@ add_subdirectory(src) file(MAKE_DIRECTORY ${CMAKE_SOURCE_DIR}/release) file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/isodir/boot/grub) -# Create grub.cfg for ISO -file(WRITE ${CMAKE_BINARY_DIR}/isodir/boot/grub/grub.cfg "set timeout=0\nset default=0\nsearch --set=root --file /boot/kernel.bin\nmenuentry \"ClaudeOS\" { multiboot2 /boot/kernel.bin }") +# Generate CPIO initial ramdisk from apps directory. +# All files in apps/ are packed into a newc-format CPIO archive. +set(INITRD_FILE ${CMAKE_BINARY_DIR}/isodir/boot/initrd.cpio) +add_custom_command( + OUTPUT ${INITRD_FILE} + COMMAND ${CMAKE_SOURCE_DIR}/scripts/gen_initrd.sh ${CMAKE_SOURCE_DIR}/apps ${INITRD_FILE} + DEPENDS ${CMAKE_SOURCE_DIR}/apps + COMMENT "Generating CPIO initial ramdisk" +) +add_custom_target(initrd DEPENDS ${INITRD_FILE}) + +# Create grub.cfg for ISO - includes module2 for the initrd +file(WRITE ${CMAKE_BINARY_DIR}/isodir/boot/grub/grub.cfg "set timeout=0\nset default=0\nsearch --set=root --file /boot/kernel.bin\nmenuentry \"ClaudeOS\" { multiboot2 /boot/kernel.bin\n module2 /boot/initrd.cpio }") # ISO Generation add_custom_target(iso ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/kernel ${CMAKE_BINARY_DIR}/isodir/boot/kernel.bin COMMAND grub-mkrescue -o ${CMAKE_SOURCE_DIR}/release/claude-os.iso ${CMAKE_BINARY_DIR}/isodir - DEPENDS kernel + DEPENDS kernel initrd COMMENT "Generating bootable ISO image" ) diff --git a/README.md b/README.md index 0229157..76033e7 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Once a task is completed, it should be checked off. - [x] Create an initial driver architecture, allowing different drivers included in the kernel to test whether they should load or not. - [x] Create a VGA driver. On startup, some memory statistics should be displayed, as well as boot progress. - [x] Create subsystem for loading new processes in Ring 3. -- [ ] Update the build script to generate a ramdisk containing any applications to run. This initial ramdisk is in CPIO format. +- [x] Update the build script to generate a ramdisk containing any applications to run. This initial ramdisk is in CPIO format. - [ ] Write a VFS subsystem. - [ ] Write a VFS driver that provides the contents of the CPIO initial ramdisk to the VFS layer. - [ ] Create a `hello-world` app. It should print `Hello, World` to its own stdout. The kernel should route this to Qemu and to the VGA dispaly. Ensure this work. diff --git a/apps/README b/apps/README new file mode 100644 index 0000000..f693f61 --- /dev/null +++ b/apps/README @@ -0,0 +1 @@ +This is the ClaudeOS initial ramdisk. diff --git a/docs/cpio.md b/docs/cpio.md new file mode 100644 index 0000000..b6f4592 --- /dev/null +++ b/docs/cpio.md @@ -0,0 +1,81 @@ +# CPIO Initial Ramdisk + +## Overview + +The initial ramdisk (initrd) provides files to the kernel at boot time before +any filesystem drivers are available. It is a CPIO archive in SVR4/newc format, +loaded by GRUB as a Multiboot2 module. + +## Build Process + +During the build, the script `scripts/gen_initrd.sh` packs all files from the +`apps/` directory into a CPIO archive: + +``` +apps/ +├── README (placeholder) +├── hello-world (future: flat binary) +└── sh (future: shell binary) +``` + +The archive is placed at `build/isodir/boot/initrd.cpio` and included in the +ISO image. GRUB loads it as a module via: + +``` +module2 /boot/initrd.cpio +``` + +## CPIO Format + +The newc (SVR4) CPIO format uses 110-byte headers with hex ASCII fields: + +``` +Offset Size Field +0 6 Magic ("070701") +6 8 Inode +14 8 Mode +22 8 UID +30 8 GID +38 8 Nlink +46 8 Mtime +54 8 Filesize +62 8 Devmajor +70 8 Devminor +78 8 Rdevmajor +86 8 Rdevminor +94 8 Namesize +102 8 Check +``` + +After the header: filename (namesize bytes, padded to 4-byte boundary), +then file data (filesize bytes, padded to 4-byte boundary). The archive +ends with a `TRAILER!!!` entry. + +## Kernel Interface + +The kernel finds the initrd by scanning Multiboot2 boot information for +`MULTIBOOT_TAG_TYPE_MODULE` (type 3). The module's physical memory range +is identity-mapped, so it can be read directly. + +```c +#include "cpio.h" + +// Find a file +cpio_entry_t entry; +if (cpio_find("hello-world", &entry) == 0) { + // entry.data = pointer to file contents + // entry.datasize = file size +} + +// Iterate all files +uint32_t offset = 0; +while (cpio_next(&offset, &entry) == 0) { + // entry.name, entry.data, entry.datasize +} +``` + +## Files + +- `scripts/gen_initrd.sh` — Build script to generate CPIO archive +- `src/cpio.h` / `src/cpio.c` — CPIO parser (find, iterate, count) +- `CMakeLists.txt` — `initrd` target and `module2` in grub.cfg diff --git a/scripts/gen_initrd.sh b/scripts/gen_initrd.sh new file mode 100755 index 0000000..8e212ec --- /dev/null +++ b/scripts/gen_initrd.sh @@ -0,0 +1,14 @@ +#!/bin/sh +# Generate CPIO initial ramdisk from apps directory +# Usage: gen_initrd.sh +set -e + +APPS_DIR="$1" +OUTPUT="$2" + +# Ensure output directory exists +mkdir -p "$(dirname "$OUTPUT")" + +cd "$APPS_DIR" +find . -not -name '.' | cpio -o -H newc > "$OUTPUT" 2>/dev/null +echo "Generated initrd: $(wc -c < "$OUTPUT") bytes" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 80c01f0..3e23404 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,7 @@ add_executable(kernel tss.c process.c syscall.c + cpio.c interrupts.S kernel.c ) diff --git a/src/cpio.c b/src/cpio.c new file mode 100644 index 0000000..474d209 --- /dev/null +++ b/src/cpio.c @@ -0,0 +1,179 @@ +/** + * @file cpio.c + * @brief CPIO newc archive parser implementation. + * + * Parses CPIO archives in the SVR4/newc format. The archive is expected + * to be loaded into memory by GRUB as a Multiboot2 module. + */ + +#include "cpio.h" +#include + +/* Debug print helpers defined in kernel.c */ +extern void offset_print(const char *str); +extern void print_hex(uint32_t val); + +/** Pointer to the CPIO archive in memory. */ +static const uint8_t *archive = NULL; + +/** Size of the archive (0 if unknown). */ +static uint32_t archive_len = 0; + +/** + * Parse an N-character hexadecimal ASCII string to uint32_t. + * + * @param s Pointer to hex string. + * @param n Number of characters to parse. + * @return Parsed value. + */ +static uint32_t parse_hex(const char *s, int n) { + uint32_t val = 0; + for (int i = 0; i < n; i++) { + char c = s[i]; + uint32_t digit; + if (c >= '0' && c <= '9') { + digit = (uint32_t)(c - '0'); + } else if (c >= 'a' && c <= 'f') { + digit = (uint32_t)(c - 'a' + 10); + } else if (c >= 'A' && c <= 'F') { + digit = (uint32_t)(c - 'A' + 10); + } else { + break; + } + val = (val << 4) | digit; + } + return val; +} + +/** + * Round up to 4-byte boundary. + */ +static inline uint32_t align4(uint32_t v) { + return (v + 3) & ~3u; +} + +/** + * Parse a CPIO entry at the given offset. + * + * @param offset Byte offset into the archive. + * @param entry Output entry information. + * @return Offset of the next entry, or 0 on error/end. + */ +static uint32_t parse_entry(uint32_t offset, cpio_entry_t *entry) { + if (!archive) return 0; + + const cpio_newc_header_t *hdr = (const cpio_newc_header_t *)(archive + offset); + + /* Verify magic */ + if (memcmp(hdr->magic, CPIO_MAGIC, 6) != 0) { + return 0; + } + + uint32_t namesize = parse_hex(hdr->namesize, 8); + uint32_t filesize = parse_hex(hdr->filesize, 8); + uint32_t mode = parse_hex(hdr->mode, 8); + + /* Filename starts right after the header */ + const char *name = (const char *)(archive + offset + CPIO_HEADER_SIZE); + + /* Data starts after header + name, aligned to 4 bytes */ + uint32_t data_offset = align4(offset + CPIO_HEADER_SIZE + namesize); + const void *data = archive + data_offset; + + /* Next entry starts after data, aligned to 4 bytes */ + uint32_t next_offset = align4(data_offset + filesize); + + entry->name = name; + entry->namesize = namesize; + entry->data = data; + entry->datasize = filesize; + entry->mode = mode; + + return next_offset; +} + +void cpio_init(const void *archive_start, uint32_t archive_size) { + archive = (const uint8_t *)archive_start; + archive_len = archive_size; + + offset_print(" CPIO: archive at "); + print_hex((uint32_t)archive_start); + offset_print(" CPIO: size = "); + print_hex(archive_size); + + /* Count and list entries */ + uint32_t count = 0; + uint32_t off = 0; + cpio_entry_t entry; + while (1) { + uint32_t next = parse_entry(off, &entry); + if (next == 0) break; + if (strcmp(entry.name, CPIO_TRAILER) == 0) break; + + offset_print(" CPIO: ["); + offset_print(entry.name); + offset_print("] size="); + print_hex(entry.datasize); + + count++; + off = next; + } + + offset_print(" CPIO: "); + print_hex(count); + offset_print(" CPIO: files found\n"); +} + +int cpio_find(const char *name, cpio_entry_t *entry) { + if (!archive) return -1; + + uint32_t off = 0; + while (1) { + uint32_t next = parse_entry(off, entry); + if (next == 0) return -1; + if (strcmp(entry->name, CPIO_TRAILER) == 0) return -1; + + /* Match by name. CPIO entries often have "./" prefix, try both. */ + if (strcmp(entry->name, name) == 0) return 0; + + /* Try matching without "./" prefix */ + if (entry->name[0] == '.' && entry->name[1] == '/' && + strcmp(entry->name + 2, name) == 0) { + return 0; + } + + /* Try matching with "./" prefix */ + if (name[0] != '.' && entry->namesize > 2) { + /* Already handled above */ + } + + off = next; + } +} + +int cpio_next(uint32_t *offset, cpio_entry_t *entry) { + if (!archive) return -1; + + uint32_t next = parse_entry(*offset, entry); + if (next == 0) return -1; + if (strcmp(entry->name, CPIO_TRAILER) == 0) return -1; + + *offset = next; + return 0; +} + +uint32_t cpio_count(void) { + if (!archive) return 0; + + uint32_t count = 0; + uint32_t off = 0; + cpio_entry_t entry; + while (1) { + uint32_t next = parse_entry(off, &entry); + if (next == 0) break; + if (strcmp(entry.name, CPIO_TRAILER) == 0) break; + count++; + off = next; + } + return count; +} diff --git a/src/cpio.h b/src/cpio.h new file mode 100644 index 0000000..93a39bf --- /dev/null +++ b/src/cpio.h @@ -0,0 +1,92 @@ +/** + * @file cpio.h + * @brief CPIO newc archive parser. + * + * Parses CPIO archives in the SVR4/newc format (magic "070701"). + * Used to read files from the initial ramdisk loaded by GRUB. + */ + +#ifndef CPIO_H +#define CPIO_H + +#include +#include + +/** + * CPIO newc header (110 bytes). + * All fields are 8-character hexadecimal ASCII strings. + */ +typedef struct cpio_newc_header { + char magic[6]; /**< Must be "070701". */ + char ino[8]; + char mode[8]; + char uid[8]; + char gid[8]; + char nlink[8]; + char mtime[8]; + char filesize[8]; + char devmajor[8]; + char devminor[8]; + char rdevmajor[8]; + char rdevminor[8]; + char namesize[8]; + char check[8]; +} cpio_newc_header_t; + +/** Size of the CPIO newc header in bytes. */ +#define CPIO_HEADER_SIZE 110 + +/** CPIO newc magic string. */ +#define CPIO_MAGIC "070701" + +/** Trailer entry name that marks end of archive. */ +#define CPIO_TRAILER "TRAILER!!!" + +/** + * CPIO file entry (result of iteration or lookup). + */ +typedef struct cpio_entry { + const char *name; /**< Filename (pointer into archive). */ + uint32_t namesize; /**< Length of filename including NUL. */ + const void *data; /**< Pointer to file data within archive. */ + uint32_t datasize; /**< Size of file data in bytes. */ + uint32_t mode; /**< File mode/permissions. */ +} cpio_entry_t; + +/** + * Initialize the CPIO parser with the archive location. + * + * @param archive_start Pointer to the start of the CPIO archive in memory. + * @param archive_size Size of the archive in bytes (0 if unknown). + */ +void cpio_init(const void *archive_start, uint32_t archive_size); + +/** + * Find a file in the CPIO archive by name. + * + * @param name Filename to search for (without leading "./"). + * @param entry Output: filled with file information if found. + * @return 0 on success, -1 if not found. + */ +int cpio_find(const char *name, cpio_entry_t *entry); + +/** + * Iterate through all entries in the CPIO archive. + * + * Call with *offset = 0 to start. Returns 0 on success, -1 when no + * more entries exist or the TRAILER is reached. + * + * @param offset In/out: current position in the archive. + * @param entry Output: filled with the next entry's information. + * @return 0 on success, -1 at end of archive. + */ +int cpio_next(uint32_t *offset, cpio_entry_t *entry); + +/** + * Get the number of files in the CPIO archive (excluding TRAILER). + * + * @return Number of files. + */ +uint32_t cpio_count(void); + +#endif /* CPIO_H */ diff --git a/src/kernel.c b/src/kernel.c index aba4740..58b885c 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -13,6 +13,7 @@ #include "tss.h" #include "syscall.h" #include "process.h" +#include "cpio.h" void offset_print(const char *str) { @@ -54,6 +55,26 @@ void kernel_main(uint32_t magic, uint32_t addr) { init_pmm(addr); offset_print("PMM initialized\n"); + /* Scan Multiboot2 tags for the initrd module */ + uint32_t initrd_start = 0, initrd_end = 0; + { + struct multiboot_tag *tag; + for (tag = (struct multiboot_tag *)(addr + 8); + tag->type != MULTIBOOT_TAG_TYPE_END; + tag = (struct multiboot_tag *)((uint8_t *)tag + ((tag->size + 7) & ~7u))) { + if (tag->type == MULTIBOOT_TAG_TYPE_MODULE) { + struct multiboot_tag_module *mod = (struct multiboot_tag_module *)tag; + initrd_start = mod->mod_start; + initrd_end = mod->mod_end; + offset_print("Initrd module at "); + print_hex(initrd_start); + offset_print(" to "); + print_hex(initrd_end); + break; /* Use first module */ + } + } + } + init_paging(); offset_print("Paging initialized\n"); @@ -72,6 +93,14 @@ void kernel_main(uint32_t magic, uint32_t addr) { init_kmalloc(); offset_print("Memory allocator initialized\n"); + /* Initialize CPIO ramdisk if module was loaded */ + if (initrd_start != 0) { + cpio_init((const void *)initrd_start, initrd_end - initrd_start); + offset_print("CPIO ramdisk initialized\n"); + } else { + offset_print("No initrd module found\n"); + } + init_tss(); offset_print("TSS initialized\n");