diff --git a/README.md b/README.md index 9e25ead..0229157 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Once a task is completed, it should be checked off. - [x] Create a memory allocator. This should provide the kernel with `malloc` and `free`. Internally, it should use the paging subsystem to ensure that the address it returns have actual RAM paged to them. - [x] Create an initial driver architecture, allowing different drivers included in the kernel to test whether they should load or not. - [x] Create a VGA driver. On startup, some memory statistics should be displayed, as well as boot progress. -- [ ] Create subsystem for loading new processes in Ring 3. +- [x] Create subsystem for loading new processes in Ring 3. - [ ] Update the build script to generate a ramdisk containing any applications to run. This initial ramdisk is in CPIO format. - [ ] Write a VFS subsystem. - [ ] Write a VFS driver that provides the contents of the CPIO initial ramdisk to the VFS layer. diff --git a/docs/process.md b/docs/process.md new file mode 100644 index 0000000..0005f2c --- /dev/null +++ b/docs/process.md @@ -0,0 +1,109 @@ +# Process Subsystem + +## Overview + +The process subsystem enables user-mode (Ring 3) process execution on ClaudeOS. +It provides process creation, context switching via the timer interrupt, and +system calls via `INT 0x80`. + +## Architecture + +### Ring Transition + +x86 protected mode uses privilege rings 0–3. The kernel runs in Ring 0 (full +hardware access) and user processes run in Ring 3 (restricted). The GDT +defines segment descriptors for both: + +| GDT Entry | Selector | Purpose | DPL | +|-----------|----------|-----------------|-----| +| 0 | 0x00 | Null | – | +| 1 | 0x08 | Kernel Code | 0 | +| 2 | 0x10 | Kernel Data | 0 | +| 3 | 0x18 | User Code | 3 | +| 4 | 0x20 | User Data | 3 | +| 5 | 0x28 | TSS | 0 | + +User-mode selectors include RPL=3: code = 0x1B, data = 0x23. + +### Task State Segment (TSS) + +The TSS (`tss.c`) stores the kernel stack pointer (SS0:ESP0) used when the CPU +transitions from Ring 3 to Ring 0 on an interrupt. Before running each process, +the scheduler updates TSS.ESP0 to that process's kernel stack top. + +### Memory Layout + +Each process gets its own page directory, cloned from the kernel's: + +``` +0x00000000 – 0x07FFFFFF : Identity-mapped (kernel/device access) +0x08048000 – ... : User code (loaded from binary image) +0xBFFF7000 – 0xBFFFF000 : User stack (2 pages, grows downward) +0xD0000000 – 0xF0000000 : Kernel heap (shared across all processes) +``` + +### Process Control Block + +```c +typedef struct process { + uint32_t pid; + process_state_t state; // UNUSED, READY, RUNNING, BLOCKED, ZOMBIE + registers_t saved_regs; // Full interrupt frame + uint32_t kernel_stack; // Base of per-process kernel stack + uint32_t kernel_stack_top; // TSS ESP0 value + uint32_t page_directory; // Physical address of page directory + uint32_t user_stack; // User stack virtual address + uint32_t entry_point; // User code entry point + int32_t exit_code; // Set on exit + uint32_t parent_pid; + char name[32]; +} process_t; +``` + +## Context Switching + +Context switching uses the interrupt frame directly: + +1. Timer IRQ (or `INT 0x80` for SYS_YIELD) fires +2. CPU pushes SS/ESP/EFLAGS/CS/EIP onto the process's kernel stack +3. ISR stub pushes the rest (pusha + DS) forming a `registers_t` +4. `schedule_tick()` is called with a pointer to these registers +5. Current process's registers are saved into its PCB +6. Next READY process's saved registers are written over the interrupt frame +7. TSS.ESP0 is updated, CR3 is switched to the new page directory +8. ISR stub restores the (now different) registers and `iret` enters the new + process in user mode + +This avoids separate context-switch assembly — the existing ISR stub handles +everything. + +## System Calls + +System calls use `INT 0x80` with the call number in EAX: + +| Number | Name | Arguments | +|--------|-------------|------------------------------| +| 0 | SYS_EXIT | EBX = exit code | +| 1 | SYS_WRITE | EBX = fd, ECX = buf, EDX = len | +| 2 | SYS_READ | (not implemented) | +| 3 | SYS_FORK | (returns child PID/0) | +| 4 | SYS_GETPID | (returns PID in EAX) | +| 5 | SYS_YIELD | (voluntary preemption) | +| 6 | SYS_WAITPID | EBX = child PID | +| 7 | SYS_EXEC | (not implemented) | + +The INT 0x80 IDT gate has DPL=3 (flags 0xEE) so user-mode code can invoke it. + +## Initial Process Entry + +`process_run_first()` performs the initial transition to user mode using an +`iret` instruction that sets up Ring 3 segment selectors, the user stack +pointer, and the entry point. This is a one-way transition — the function +does not return. + +## Files + +- `tss.h` / `tss.c` — TSS structure and initialization +- `process.h` / `process.c` — Process table, creation, scheduling, exit, fork +- `syscall.h` / `syscall.c` — System call dispatch and handlers +- `interrupts.S` — Assembly stubs: `isr128` (INT 0x80), `tss_flush`, `enter_usermode` diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 908fac1..80c01f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,6 +13,9 @@ add_executable(kernel string.c driver.c vga.c + tss.c + process.c + syscall.c interrupts.S kernel.c ) diff --git a/src/gdt.c b/src/gdt.c index 4239ed2..310caa3 100644 --- a/src/gdt.c +++ b/src/gdt.c @@ -3,8 +3,8 @@ /* GDT Pointer Structure */ struct gdt_ptr gp; -/* GDT entries */ -struct gdt_entry gdt[5]; +/* GDT entries: 0=null, 1=kcode, 2=kdata, 3=ucode, 4=udata, 5=tss */ +struct gdt_entry gdt[6]; extern void gdt_flush(uint32_t); @@ -33,7 +33,7 @@ void gdt_set_gate(int32_t num, uint32_t base, uint32_t limit, uint8_t access, ui void init_gdt() { /* Setup the GDT pointer and limit */ - gp.limit = (sizeof(struct gdt_entry) * 5) - 1; + gp.limit = (sizeof(struct gdt_entry) * 6) - 1; gp.base = (uint32_t)&gdt; /* Our NULL descriptor */ diff --git a/src/gdt.h b/src/gdt.h index 93c1721..0c4ea73 100644 --- a/src/gdt.h +++ b/src/gdt.h @@ -22,4 +22,7 @@ struct gdt_ptr { /* Initialize GDT */ void init_gdt(void); +/* Set a GDT gate (also used by TSS setup) */ +void gdt_set_gate(int32_t num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran); + #endif // GDT_H diff --git a/src/idt.c b/src/idt.c index 4b5b981..8d53a16 100644 --- a/src/idt.c +++ b/src/idt.c @@ -15,6 +15,11 @@ static void set_idt_gate(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags idt[num].flags = flags; } +// Public version for other subsystems (e.g., syscall INT 0x80) +void set_idt_gate_from_c(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags) { + set_idt_gate(num, base, sel, flags); +} + // Exception Handlers (ISRs) extern void isr0(); extern void isr1(); diff --git a/src/interrupts.S b/src/interrupts.S index 6285f0a..4194734 100644 --- a/src/interrupts.S +++ b/src/interrupts.S @@ -118,3 +118,59 @@ ISR_IRQ 12, 44 ISR_IRQ 13, 45 ISR_IRQ 14, 46 ISR_IRQ 15, 47 + +/* + * INT 0x80 - System call entry point. + * Uses the same isr_common_stub so the register layout matches registers_t. + */ +.global isr128 +.type isr128, @function +isr128: + cli + push $0 /* Fake error code */ + push $0x80 /* Interrupt number 128 */ + jmp isr_common_stub + +/* + * tss_flush - Load the Task Register with the TSS selector. + * TSS is GDT entry 5, selector = 5*8 = 0x28. With RPL=0: 0x28. + */ +.global tss_flush +.type tss_flush, @function +tss_flush: + mov $0x28, %ax + ltr %ax + ret + +/* + * enter_usermode - Switch to Ring 3 user mode via iret. + * void enter_usermode(uint32_t eip, uint32_t esp); + * + * Builds an iret frame on the stack: + * SS = 0x23 (user data) + * ESP = user stack pointer + * EFLAGS = IF=1 + * CS = 0x1B (user code) + * EIP = user entry point + */ +.global enter_usermode +.type enter_usermode, @function +enter_usermode: + mov 4(%esp), %ecx /* user EIP */ + mov 8(%esp), %edx /* user ESP */ + + /* Set data segment registers to user data segment */ + mov $0x23, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + + /* Build iret frame */ + push $0x23 /* SS (user data) */ + push %edx /* ESP (user stack) */ + pushf /* EFLAGS */ + orl $0x200, (%esp) /* Ensure IF (Interrupt Flag) is set */ + push $0x1B /* CS (user code) */ + push %ecx /* EIP (entry point) */ + iret diff --git a/src/isr.c b/src/isr.c index 8786387..fac1f69 100644 --- a/src/isr.c +++ b/src/isr.c @@ -1,5 +1,7 @@ #include "isr.h" #include "pic.h" +#include "process.h" +#include "syscall.h" #include /* Forward declaration for kernel panic or similar */ @@ -44,24 +46,28 @@ char *exception_messages[] = { void isr_handler(registers_t *regs) { - // If it's a hardware interrupt (IRQ), we must acknowledge it + /* System call (INT 0x80) */ + if (regs->int_no == 0x80) { + syscall_handler(regs); + return; + } + + /* Hardware interrupts (IRQs 0-15, mapped to vectors 32-47) */ if (regs->int_no >= 32 && regs->int_no < 48) { - // Send EOI to PIC (IRQ number 0-15) + /* Send EOI to PIC (IRQ number 0-15) */ pic_send_eoi(regs->int_no - 32); - // Here we would call the registered handler for this IRQ - // For now, just print something for the timer tick so we know it works, - // but limit it to avoid flooding the log. if (regs->int_no == 32) { - // Timer tick - do nothing verbose - // offset_print("."); + /* Timer tick - invoke scheduler */ + schedule_tick(regs); } else if (regs->int_no == 33) { - // Keyboard + /* Keyboard */ offset_print("Keyboard IRQ!\n"); } return; } + /* CPU exceptions (vectors 0-31) */ offset_print("received interrupt: "); print_hex(regs->int_no); offset_print("\n"); @@ -70,6 +76,12 @@ void isr_handler(registers_t *regs) { offset_print(exception_messages[regs->int_no]); offset_print(" Exception. System Halted!\n"); + offset_print(" EIP: "); + print_hex(regs->eip); + offset_print(" CS: "); + print_hex(regs->cs); + offset_print(" ERR: "); + print_hex(regs->err_code); for (;;) ; } } diff --git a/src/kernel.c b/src/kernel.c index 41110a2..aba4740 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -10,6 +10,9 @@ #include "kmalloc.h" #include "driver.h" #include "vga.h" +#include "tss.h" +#include "syscall.h" +#include "process.h" void offset_print(const char *str) { @@ -69,6 +72,15 @@ void kernel_main(uint32_t magic, uint32_t addr) { init_kmalloc(); offset_print("Memory allocator initialized\n"); + init_tss(); + offset_print("TSS initialized\n"); + + init_syscalls(); + offset_print("Syscalls initialized\n"); + + init_process(); + offset_print("Process subsystem initialized\n"); + init_drivers(); offset_print("Drivers initialized\n"); @@ -88,6 +100,66 @@ void kernel_main(uint32_t magic, uint32_t addr) { offset_print("FAILED to kmalloc\n"); } + /* + * Create a minimal test user-mode program. + * This is flat binary machine code that calls SYS_WRITE then SYS_EXIT. + * + * The program writes "Hello from Ring 3!\n" to stdout (fd=1) via INT 0x80, + * then exits with code 42. + * + * Assembly (i386): + * ; SYS_WRITE(1, msg, 19) + * mov eax, 1 ; SYS_WRITE + * mov ebx, 1 ; fd = stdout + * call next ; get EIP for position-independent addressing + * next: + * pop ecx ; ECX = address of 'next' + * add ecx, 25 ; ECX = address of message string (offset to msg) + * mov edx, 19 ; len = 19 + * int 0x80 + * ; SYS_EXIT(42) + * mov eax, 0 ; SYS_EXIT + * mov ebx, 42 ; code = 42 + * int 0x80 + * ; loop forever (shouldn't reach here) + * jmp $ + * msg: + * db "Hello from Ring 3!", 10 + */ + static const uint8_t user_program[] = { + 0xB8, 0x01, 0x00, 0x00, 0x00, /* mov eax, 1 (SYS_WRITE) */ + 0xBB, 0x01, 0x00, 0x00, 0x00, /* mov ebx, 1 (stdout) */ + 0xE8, 0x00, 0x00, 0x00, 0x00, /* call next (push EIP) */ + /* next: offset 15 */ + 0x59, /* pop ecx */ + 0x83, 0xC1, 0x19, /* add ecx, 25 (offset from 'next' to msg) */ + 0xBA, 0x13, 0x00, 0x00, 0x00, /* mov edx, 19 (length) */ + 0xCD, 0x80, /* int 0x80 */ + /* SYS_EXIT(42): offset 26 */ + 0xB8, 0x00, 0x00, 0x00, 0x00, /* mov eax, 0 (SYS_EXIT) */ + 0xBB, 0x2A, 0x00, 0x00, 0x00, /* mov ebx, 42 (exit code) */ + 0xCD, 0x80, /* int 0x80 */ + 0xEB, 0xFE, /* jmp $ (infinite loop safety) */ + /* msg: offset 40 */ + 'H','e','l','l','o',' ','f','r','o','m',' ', + 'R','i','n','g',' ','3','!','\n' + }; + + int32_t pid = process_create("init", user_program, sizeof(user_program)); + if (pid > 0) { + offset_print("Created init process, pid="); + print_hex((uint32_t)pid); + + /* Enable interrupts before entering user mode */ + asm volatile("sti"); + offset_print("Interrupts enabled\n"); + + /* Enter user mode - does not return */ + process_run_first(); + } else { + offset_print("FAILED to create init process\n"); + } + /* Enable interrupts */ asm volatile("sti"); offset_print("Interrupts enabled\n"); diff --git a/src/paging.c b/src/paging.c index c6c1efa..e6995dc 100644 --- a/src/paging.c +++ b/src/paging.c @@ -276,3 +276,51 @@ void init_paging(void) { offset_print(" PAGING: enabled\n"); } + +uint32_t paging_get_directory_phys(void) { + return (uint32_t)page_directory; +} + +uint32_t paging_clone_directory(void) { + /* Allocate a new page for the directory */ + phys_addr_t new_dir_phys = pmm_alloc_page(PMM_ZONE_NORMAL); + if (new_dir_phys == 0) { + offset_print(" PAGING: cannot allocate page directory\n"); + return 0; + } + + uint32_t *new_dir = (uint32_t *)new_dir_phys; + + /* Copy all entries from the kernel page directory. + * This shares the kernel-space mappings (identity map, kernel heap) + * with the new process. User-space mappings will be added separately. */ + memcpy(new_dir, page_directory, 4096); + + return new_dir_phys; +} + +void paging_map_page_in(uint32_t *pd, uint32_t vaddr, uint32_t paddr, uint32_t flags) { + uint32_t pd_idx = PD_INDEX(vaddr); + uint32_t pt_idx = PT_INDEX(vaddr); + + uint32_t *pt; + if (pd[pd_idx] & PAGE_PRESENT) { + pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000); + } else { + /* Allocate a new page table */ + phys_addr_t pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL); + if (pt_phys == 0) { + offset_print(" PAGING: cannot allocate page table for process\n"); + return; + } + memset((void *)pt_phys, 0, 4096); + pd[pd_idx] = pt_phys | PAGE_PRESENT | PAGE_WRITE | PAGE_USER; + pt = (uint32_t *)pt_phys; + } + + pt[pt_idx] = (paddr & 0xFFFFF000) | (flags & 0xFFF); +} + +void paging_switch_directory(uint32_t phys_addr) { + __asm__ volatile("mov %0, %%cr3" : : "r"(phys_addr) : "memory"); +} diff --git a/src/paging.h b/src/paging.h index 98aa25f..22de38e 100644 --- a/src/paging.h +++ b/src/paging.h @@ -83,4 +83,36 @@ void paging_free_page(void *vaddr); */ uint32_t paging_get_physical(uint32_t vaddr); +/** + * Get the physical address of the kernel page directory. + * + * @return Physical address of the page directory. + */ +uint32_t paging_get_directory_phys(void); + +/** + * Clone the kernel page directory for a new process. + * Copies all kernel-space entries; user-space entries are empty. + * + * @return Physical address of the new page directory, or 0 on failure. + */ +uint32_t paging_clone_directory(void); + +/** + * Map a page in a specific page directory (not necessarily the active one). + * + * @param pd Pointer to the page directory (virtual/identity-mapped address). + * @param vaddr Virtual address to map (page-aligned). + * @param paddr Physical address to map to (page-aligned). + * @param flags Page flags. + */ +void paging_map_page_in(uint32_t *pd, uint32_t vaddr, uint32_t paddr, uint32_t flags); + +/** + * Switch the active page directory. + * + * @param phys_addr Physical address of the page directory. + */ +void paging_switch_directory(uint32_t phys_addr); + #endif /* PAGING_H */ diff --git a/src/process.c b/src/process.c new file mode 100644 index 0000000..4d49ac1 --- /dev/null +++ b/src/process.c @@ -0,0 +1,336 @@ +/** + * @file process.c + * @brief Process management subsystem implementation. + * + * Manages process creation, context switching, and scheduling. + * Each process has its own page directory and kernel stack. + * Context switching is done by modifying the interrupt frame registers + * on the kernel stack, so the iret restores the next process's state. + */ + +#include "process.h" +#include "tss.h" +#include "paging.h" +#include "pmm.h" +#include "kmalloc.h" +#include + +/* Debug print helpers defined in kernel.c */ +extern void offset_print(const char *str); +extern void print_hex(uint32_t val); + +/** Assembly helper: enter user mode for the first process. */ +extern void enter_usermode(uint32_t eip, uint32_t esp); + +/** Process table. */ +static process_t process_table[MAX_PROCESSES]; + +/** Currently running process, or NULL if none. */ +static process_t *current_process = NULL; + +/** Next PID to assign. */ +static uint32_t next_pid = 1; + +/** + * Find a free slot in the process table. + * + * @return Index of a free slot, or -1 if full. + */ +static int find_free_slot(void) { + for (int i = 0; i < MAX_PROCESSES; i++) { + if (process_table[i].state == PROCESS_UNUSED) { + return i; + } + } + return -1; +} + +void init_process(void) { + memset(process_table, 0, sizeof(process_table)); + current_process = NULL; + next_pid = 1; + offset_print(" PROCESS: subsystem initialized\n"); +} + +int32_t process_create(const char *name, const void *code, uint32_t size) { + int slot = find_free_slot(); + if (slot < 0) { + offset_print(" PROCESS: no free slots\n"); + return -1; + } + + process_t *proc = &process_table[slot]; + memset(proc, 0, sizeof(process_t)); + + proc->pid = next_pid++; + proc->state = PROCESS_READY; + + /* Copy name */ + uint32_t nlen = strlen(name); + if (nlen > 31) nlen = 31; + memcpy(proc->name, name, nlen); + proc->name[nlen] = '\0'; + + /* Allocate kernel stack (full page, not from kmalloc which has header overhead) */ + void *kstack = paging_alloc_page(); + if (!kstack) { + offset_print(" PROCESS: cannot allocate kernel stack\n"); + proc->state = PROCESS_UNUSED; + return -1; + } + proc->kernel_stack = (uint32_t)kstack; + proc->kernel_stack_top = proc->kernel_stack + 4096; + + /* Clone the kernel page directory */ + proc->page_directory = paging_clone_directory(); + if (!proc->page_directory) { + offset_print(" PROCESS: cannot clone page directory\n"); + paging_free_page((void *)proc->kernel_stack); + proc->state = PROCESS_UNUSED; + return -1; + } + + uint32_t *pd = (uint32_t *)proc->page_directory; + + /* Map user code pages */ + uint32_t code_pages = (size + 4095) / 4096; + for (uint32_t i = 0; i < code_pages; i++) { + phys_addr_t phys = pmm_alloc_page(PMM_ZONE_NORMAL); + if (phys == 0) { + offset_print(" PROCESS: cannot allocate code page\n"); + /* TODO: clean up already allocated pages */ + proc->state = PROCESS_UNUSED; + return -1; + } + + uint32_t vaddr = USER_CODE_START + i * 4096; + paging_map_page_in(pd, vaddr, phys, + PAGE_PRESENT | PAGE_WRITE | PAGE_USER); + + /* Copy code to the physical page (identity-mapped, so phys == virt) */ + uint32_t offset = i * 4096; + uint32_t bytes = size - offset; + if (bytes > 4096) bytes = 4096; + memcpy((void *)phys, (const uint8_t *)code + offset, bytes); + if (bytes < 4096) { + memset((void *)(phys + bytes), 0, 4096 - bytes); + } + } + + /* Map user stack pages */ + uint32_t stack_base = USER_STACK_TOP - USER_STACK_PAGES * 4096; + for (uint32_t i = 0; i < USER_STACK_PAGES; i++) { + phys_addr_t phys = pmm_alloc_page(PMM_ZONE_NORMAL); + if (phys == 0) { + offset_print(" PROCESS: cannot allocate stack page\n"); + proc->state = PROCESS_UNUSED; + return -1; + } + + uint32_t vaddr = stack_base + i * 4096; + paging_map_page_in(pd, vaddr, phys, + PAGE_PRESENT | PAGE_WRITE | PAGE_USER); + + /* Zero the stack page */ + memset((void *)phys, 0, 4096); + } + + proc->user_stack = USER_STACK_TOP; + proc->entry_point = USER_CODE_START; + + /* Set up saved registers for the first context switch. + * When the scheduler loads these into regs on the stack, the + * iret will enter user mode at the entry point. */ + memset(&proc->saved_regs, 0, sizeof(registers_t)); + proc->saved_regs.ds = 0x23; /* User data segment */ + proc->saved_regs.ss = 0x23; /* User stack segment */ + proc->saved_regs.cs = 0x1B; /* User code segment */ + proc->saved_regs.eip = USER_CODE_START; + proc->saved_regs.useresp = USER_STACK_TOP; + proc->saved_regs.eflags = 0x202; /* IF=1 (enable interrupts) */ + proc->saved_regs.esp = USER_STACK_TOP; /* For pusha's ESP */ + + offset_print(" PROCESS: created '"); + offset_print(proc->name); + offset_print("' pid="); + print_hex(proc->pid); + + return (int32_t)proc->pid; +} + +void schedule_tick(registers_t *regs) { + if (!current_process && next_pid <= 1) { + return; /* No processes created yet */ + } + + /* Save current process state */ + if (current_process) { + current_process->saved_regs = *regs; + if (current_process->state == PROCESS_RUNNING) { + current_process->state = PROCESS_READY; + } + } + + /* Find next ready process (round-robin) */ + uint32_t start_idx = 0; + if (current_process) { + /* Find current process's index in the table */ + for (int i = 0; i < MAX_PROCESSES; i++) { + if (&process_table[i] == current_process) { + start_idx = (uint32_t)i; + break; + } + } + } + + process_t *next = NULL; + for (int i = 1; i <= MAX_PROCESSES; i++) { + uint32_t idx = (start_idx + (uint32_t)i) % MAX_PROCESSES; + if (process_table[idx].state == PROCESS_READY) { + next = &process_table[idx]; + break; + } + } + + if (!next) { + /* No other process ready */ + if (current_process && current_process->state == PROCESS_READY) { + current_process->state = PROCESS_RUNNING; + } + return; + } + + /* Switch to next process */ + current_process = next; + current_process->state = PROCESS_RUNNING; + + /* Update TSS kernel stack for ring transitions */ + tss_set_kernel_stack(current_process->kernel_stack_top); + + /* Switch page directory */ + paging_switch_directory(current_process->page_directory); + + /* Restore next process's registers into the interrupt frame */ + *regs = current_process->saved_regs; +} + +void schedule(void) { + /* Trigger a yield via software interrupt. + * This is a simplified version for voluntary preemption from kernel code. */ + __asm__ volatile("int $0x80" : : "a"(5)); /* SYS_YIELD = 5 */ +} + +void process_exit(int32_t code) { + if (!current_process) { + offset_print(" PROCESS: exit with no current process\n"); + return; + } + + offset_print(" PROCESS: pid "); + print_hex(current_process->pid); + offset_print(" PROCESS: exited with code "); + print_hex((uint32_t)code); + + current_process->state = PROCESS_ZOMBIE; + current_process->exit_code = code; + + /* Find another process to run. + * We construct a minimal register frame to pass to schedule_tick. + * Since the process is zombie, schedule_tick won't save its state. */ + registers_t dummy; + memset(&dummy, 0, sizeof(dummy)); + schedule_tick(&dummy); + + /* If we get here, no other process was ready. Halt. */ + offset_print(" PROCESS: no processes remaining, halting\n"); + for (;;) { + __asm__ volatile("hlt"); + } +} + +process_t *process_current(void) { + return current_process; +} + +process_t *process_get(uint32_t pid) { + for (int i = 0; i < MAX_PROCESSES; i++) { + if (process_table[i].state != PROCESS_UNUSED && + process_table[i].pid == pid) { + return &process_table[i]; + } + } + return NULL; +} + +int32_t process_fork(void) { + if (!current_process) { + return -1; + } + + int slot = find_free_slot(); + if (slot < 0) { + return -1; + } + + process_t *child = &process_table[slot]; + memcpy(child, current_process, sizeof(process_t)); + + child->pid = next_pid++; + child->state = PROCESS_READY; + child->parent_pid = current_process->pid; + + /* Allocate a separate kernel stack for the child */ + void *child_kstack = paging_alloc_page(); + if (!child_kstack) { + child->state = PROCESS_UNUSED; + return -1; + } + child->kernel_stack = (uint32_t)child_kstack; + child->kernel_stack_top = child->kernel_stack + 4096; + + /* Clone the page directory */ + child->page_directory = paging_clone_directory(); + if (!child->page_directory) { + kfree((void *)child->kernel_stack); + child->state = PROCESS_UNUSED; + return -1; + } + + /* Child's return value is 0 (in EAX) */ + child->saved_regs.eax = 0; + + /* Parent's return value is child's PID */ + return (int32_t)child->pid; +} + +void process_run_first(void) { + /* Find the first ready process */ + process_t *first = NULL; + for (int i = 0; i < MAX_PROCESSES; i++) { + if (process_table[i].state == PROCESS_READY) { + first = &process_table[i]; + break; + } + } + + if (!first) { + offset_print(" PROCESS: no process to run\n"); + return; + } + + current_process = first; + first->state = PROCESS_RUNNING; + + /* Set up TSS for this process */ + tss_set_kernel_stack(first->kernel_stack_top); + + /* Switch to the process's page directory */ + paging_switch_directory(first->page_directory); + + offset_print(" PROCESS: entering user mode for '"); + offset_print(first->name); + offset_print("'\n"); + + /* Jump to user mode - does not return */ + enter_usermode(first->entry_point, first->user_stack); +} diff --git a/src/process.h b/src/process.h new file mode 100644 index 0000000..d4901df --- /dev/null +++ b/src/process.h @@ -0,0 +1,127 @@ +/** + * @file process.h + * @brief Process management subsystem. + * + * Manages process creation, scheduling, and context switching. + * Supports both kernel-mode and user-mode (Ring 3) processes. + */ + +#ifndef PROCESS_H +#define PROCESS_H + +#include +#include +#include "isr.h" + +/** Maximum number of concurrent processes. */ +#define MAX_PROCESSES 64 + +/** Per-process kernel stack size (4 KiB). */ +#define KERNEL_STACK_SIZE 4096 + +/** User-mode stack virtual address (top of user space). */ +#define USER_STACK_TOP 0xBFFFF000 +/** User-mode stack size (8 KiB = 2 pages). */ +#define USER_STACK_PAGES 2 + +/** User-mode code start virtual address. */ +#define USER_CODE_START 0x08048000 + +/** Process states. */ +typedef enum { + PROCESS_UNUSED = 0, /**< Slot is free. */ + PROCESS_READY, /**< Ready to run. */ + PROCESS_RUNNING, /**< Currently executing. */ + PROCESS_BLOCKED, /**< Waiting for I/O or event. */ + PROCESS_ZOMBIE, /**< Finished, waiting for parent to reap. */ +} process_state_t; + +/** + * Saved CPU context for context switching. + * Uses the full interrupt frame (registers_t from isr.h) so that + * saving/restoring context works directly with the ISR stub's stack layout. + */ + +/** + * Process control block (PCB). + */ +typedef struct process { + uint32_t pid; /**< Process ID. */ + process_state_t state; /**< Current state. */ + registers_t saved_regs; /**< Saved interrupt frame for context switch. */ + uint32_t kernel_stack; /**< Base of kernel stack (virtual). */ + uint32_t kernel_stack_top; /**< Kernel stack top for TSS. */ + uint32_t page_directory; /**< Physical address of page directory. */ + uint32_t user_stack; /**< Virtual address of user stack top. */ + uint32_t entry_point; /**< User-mode entry point. */ + int32_t exit_code; /**< Exit code (if ZOMBIE). */ + uint32_t parent_pid; /**< Parent process ID. */ + char name[32]; /**< Process name (for debugging). */ +} process_t; + +/** + * Initialize the process subsystem. + * Must be called after paging and kmalloc are initialized. + */ +void init_process(void); + +/** + * Create a new user-mode process from a memory image. + * + * @param name Process name (for debugging). + * @param code Pointer to the code to load. + * @param size Size of the code in bytes. + * @return PID of the new process, or -1 on failure. + */ +int32_t process_create(const char *name, const void *code, uint32_t size); + +/** + * Yield the current process to the scheduler. + * Called from timer interrupt or voluntarily via SYS_YIELD. + * Modifies the registers on the stack to switch context. + * + * @param regs Pointer to the interrupt frame registers on the kernel stack. + */ +void schedule_tick(registers_t *regs); + +/** + * Voluntary yield wrapper (triggers schedule via current context). + */ +void schedule(void); + +/** + * Exit the current process with the given exit code. + * + * @param code Exit code. + */ +void process_exit(int32_t code); + +/** + * Get the currently running process. + * + * @return Pointer to the current process PCB, or NULL if none. + */ +process_t *process_current(void); + +/** + * Get a process by PID. + * + * @param pid Process ID. + * @return Pointer to the process PCB, or NULL if not found. + */ +process_t *process_get(uint32_t pid); + +/** + * Fork the current process. + * + * @return PID of the child in the parent, 0 in the child, -1 on error. + */ +int32_t process_fork(void); + +/** + * Start the first user-mode process. Does not return if a process is ready. + * Should be called after creating at least one process. + */ +void process_run_first(void); + +#endif /* PROCESS_H */ diff --git a/src/syscall.c b/src/syscall.c new file mode 100644 index 0000000..bc32ee7 --- /dev/null +++ b/src/syscall.c @@ -0,0 +1,151 @@ +/** + * @file syscall.c + * @brief System call handler implementation. + * + * Dispatches INT 0x80 system calls to the appropriate kernel function. + * System call number is in EAX, arguments in EBX, ECX, EDX, ESI, EDI. + * Return value is placed in EAX. + */ + +#include "syscall.h" +#include "process.h" +#include "port_io.h" +#include "vga.h" +#include + +/* Debug print helpers defined in kernel.c */ +extern void offset_print(const char *str); +extern void print_hex(uint32_t val); + +/** IDT gate setup (from idt.c) */ +extern void set_idt_gate_from_c(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags); + +/** INT 0x80 assembly stub */ +extern void isr128(void); + +/** + * Handle SYS_EXIT: terminate the current process. + */ +static int32_t sys_exit(registers_t *regs) { + process_exit((int32_t)regs->ebx); + /* Never returns */ + return 0; +} + +/** + * Handle SYS_WRITE: write bytes to a file descriptor. + * Currently only supports fd=1 (stdout) -> debug port + VGA. + */ +static int32_t sys_write(registers_t *regs) { + int fd = (int)regs->ebx; + const char *buf = (const char *)regs->ecx; + uint32_t len = regs->edx; + + if (fd == 1 || fd == 2) { + /* stdout or stderr: write to debug port and VGA */ + for (uint32_t i = 0; i < len; i++) { + outb(0xE9, buf[i]); + vga_putchar(buf[i]); + } + return (int32_t)len; + } + + return -1; /* Invalid fd */ +} + +/** + * Handle SYS_READ: read bytes from a file descriptor. + * Stub for now. + */ +static int32_t sys_read(registers_t *regs) { + (void)regs; + return -1; /* Not implemented */ +} + +/** + * Handle SYS_FORK: fork the current process. + */ +static int32_t sys_fork(registers_t *regs) { + (void)regs; + return process_fork(); +} + +/** + * Handle SYS_GETPID: return the current process ID. + */ +static int32_t sys_getpid(registers_t *regs) { + (void)regs; + process_t *cur = process_current(); + return cur ? (int32_t)cur->pid : -1; +} + +/** + * Handle SYS_YIELD: voluntarily yield the CPU. + */ +static int32_t sys_yield(registers_t *regs) { + (void)regs; + schedule(); + return 0; +} + +/** + * Handle SYS_WAITPID: wait for a child to exit. + */ +static int32_t sys_waitpid(registers_t *regs) { + uint32_t pid = regs->ebx; + process_t *child = process_get(pid); + if (!child) { + return -1; + } + + /* Busy-wait until child is zombie */ + while (child->state != PROCESS_ZOMBIE) { + schedule(); + } + + int32_t code = child->exit_code; + child->state = PROCESS_UNUSED; + return code; +} + +/** + * Handle SYS_EXEC: placeholder. + */ +static int32_t sys_exec(registers_t *regs) { + (void)regs; + return -1; /* Not implemented yet */ +} + +/** System call dispatch table. */ +typedef int32_t (*syscall_fn)(registers_t *); +static syscall_fn syscall_table[NUM_SYSCALLS] = { + [SYS_EXIT] = sys_exit, + [SYS_WRITE] = sys_write, + [SYS_READ] = sys_read, + [SYS_FORK] = sys_fork, + [SYS_GETPID] = sys_getpid, + [SYS_YIELD] = sys_yield, + [SYS_WAITPID] = sys_waitpid, + [SYS_EXEC] = sys_exec, +}; + +void syscall_handler(registers_t *regs) { + uint32_t num = regs->eax; + + if (num >= NUM_SYSCALLS || !syscall_table[num]) { + offset_print(" SYSCALL: invalid syscall "); + print_hex(num); + regs->eax = (uint32_t)-1; + return; + } + + int32_t ret = syscall_table[num](regs); + regs->eax = (uint32_t)ret; +} + +void init_syscalls(void) { + /* Install INT 0x80 as a user-callable interrupt gate. + * Flags: 0xEE = Present(1) DPL(11) 0 Type(1110) = 32-bit Interrupt Gate, Ring 3 callable */ + set_idt_gate_from_c(0x80, (uint32_t)isr128, 0x08, 0xEE); + offset_print(" SYSCALL: INT 0x80 installed\n"); +} diff --git a/src/syscall.h b/src/syscall.h new file mode 100644 index 0000000..e0344c2 --- /dev/null +++ b/src/syscall.h @@ -0,0 +1,42 @@ +/** + * @file syscall.h + * @brief System call interface. + * + * Defines system call numbers and the kernel-side handler. User-mode + * processes invoke system calls via INT 0x80 with the call number in EAX + * and arguments in EBX, ECX, EDX, ESI, EDI. + */ + +#ifndef SYSCALL_H +#define SYSCALL_H + +#include +#include "isr.h" + +/** System call numbers. */ +#define SYS_EXIT 0 /**< Exit the process. Arg: exit code in EBX. */ +#define SYS_WRITE 1 /**< Write to a file descriptor. fd=EBX, buf=ECX, len=EDX. */ +#define SYS_READ 2 /**< Read from a file descriptor. fd=EBX, buf=ECX, len=EDX. */ +#define SYS_FORK 3 /**< Fork the current process. */ +#define SYS_GETPID 4 /**< Get current process ID. */ +#define SYS_YIELD 5 /**< Yield the CPU. */ +#define SYS_WAITPID 6 /**< Wait for a child process. pid=EBX. */ +#define SYS_EXEC 7 /**< Execute a program. path=EBX, argv=ECX. */ + +/** Total number of system calls. */ +#define NUM_SYSCALLS 8 + +/** + * Initialize the system call handler. + * Installs INT 0x80 in the IDT. + */ +void init_syscalls(void); + +/** + * System call dispatcher (called from the INT 0x80 handler). + * + * @param regs Register state at the time of the interrupt. + */ +void syscall_handler(registers_t *regs); + +#endif /* SYSCALL_H */ diff --git a/src/tss.c b/src/tss.c new file mode 100644 index 0000000..4052b6b --- /dev/null +++ b/src/tss.c @@ -0,0 +1,47 @@ +/** + * @file tss.c + * @brief Task State Segment initialization and management. + * + * Sets up the TSS for ring 3 -> ring 0 transitions. The TSS is installed + * as GDT entry 5 (selector 0x28). The GDT must be expanded to 6 entries + * to accommodate the TSS descriptor. + */ + +#include "tss.h" +#include "gdt.h" +#include + +/** The TSS instance. */ +static tss_entry_t tss; + +/** Assembly function to load the TSS register. */ +extern void tss_flush(void); + +void init_tss(void) { + uint32_t base = (uint32_t)&tss; + uint32_t limit = sizeof(tss) - 1; + + /* Clear the TSS */ + memset(&tss, 0, sizeof(tss)); + + /* Set kernel stack segment and pointer. + * SS0 = kernel data segment (0x10). + * ESP0 will be set per-process during context switches. */ + tss.ss0 = 0x10; + tss.esp0 = 0; /* Will be set before entering user mode */ + + /* Set the I/O map base to the size of the TSS, meaning no I/O bitmap. */ + tss.iomap_base = sizeof(tss); + + /* Install the TSS descriptor in GDT entry 5. + * Access byte: 0xE9 = Present(1) DPL(11) 0 Type(1001) = 32-bit TSS (Available) + * Granularity: 0x00 = byte granularity, 16-bit */ + gdt_set_gate(5, base, limit, 0xE9, 0x00); + + /* Load the TSS register */ + tss_flush(); +} + +void tss_set_kernel_stack(uint32_t esp0) { + tss.esp0 = esp0; +} diff --git a/src/tss.h b/src/tss.h new file mode 100644 index 0000000..4c19a6b --- /dev/null +++ b/src/tss.h @@ -0,0 +1,63 @@ +/** + * @file tss.h + * @brief Task State Segment (TSS) definitions. + * + * The TSS is required by x86 for ring transitions. When a user-mode process + * triggers an interrupt, the CPU loads the kernel stack pointer (SS0:ESP0) + * from the TSS before pushing the interrupt frame. + */ + +#ifndef TSS_H +#define TSS_H + +#include + +/** + * x86 Task State Segment structure. + * Only SS0 and ESP0 are actively used for ring 3 -> ring 0 transitions. + */ +typedef struct tss_entry { + uint32_t prev_tss; + uint32_t esp0; /**< Kernel stack pointer (loaded on ring transition). */ + uint32_t ss0; /**< Kernel stack segment (loaded on ring transition). */ + uint32_t esp1; + uint32_t ss1; + uint32_t esp2; + uint32_t ss2; + uint32_t cr3; + uint32_t eip; + uint32_t eflags; + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; + uint32_t ebp; + uint32_t esi; + uint32_t edi; + uint32_t es; + uint32_t cs; + uint32_t ss; + uint32_t ds; + uint32_t fs; + uint32_t gs; + uint32_t ldt; + uint16_t trap; + uint16_t iomap_base; +} __attribute__((packed)) tss_entry_t; + +/** + * Initialize the TSS and install it as GDT entry 5 (selector 0x28). + * Must be called after init_gdt(). + */ +void init_tss(void); + +/** + * Update the kernel stack pointer in the TSS. + * Called during context switches to set the stack for the next process. + * + * @param esp0 The new kernel stack pointer. + */ +void tss_set_kernel_stack(uint32_t esp0); + +#endif /* TSS_H */