/** * @file syscall.c * @brief System call handler implementation. * * Dispatches INT 0x80 system calls to the appropriate kernel function. * System call number is in EAX, arguments in EBX, ECX, EDX, ESI, EDI. * Return value is placed in EAX. */ #include "syscall.h" #include "process.h" #include "env.h" #include "port_io.h" #include "vga.h" #include "vfs.h" #include "keyboard.h" #include "cpio.h" #include "paging.h" #include "pmm.h" #include #include /** Magic return value indicating the syscall blocked and switched processes. * syscall_handler must NOT overwrite regs->eax in this case. */ #define SYSCALL_SWITCHED 0x7FFFFFFF /* Debug print helpers defined in kernel.c */ extern void offset_print(const char *str); extern void print_hex(uint32_t val); /** IDT gate setup (from idt.c) */ extern void set_idt_gate_from_c(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags); /** INT 0x80 assembly stub */ extern void isr128(void); /** * Handle SYS_EXIT: terminate the current process. */ static int32_t sys_exit(registers_t *regs) { process_exit((int32_t)regs->ebx); /* Never returns */ return 0; } /** * Handle SYS_WRITE: write bytes to a file descriptor. * Currently only supports fd=1 (stdout) -> debug port + VGA. */ static int32_t sys_write(registers_t *regs) { int fd = (int)regs->ebx; const char *buf = (const char *)regs->ecx; uint32_t len = regs->edx; if (fd == 1 || fd == 2) { /* stdout or stderr: write to debug port and VGA */ for (uint32_t i = 0; i < len; i++) { outb(0xE9, buf[i]); vga_putchar(buf[i]); } return (int32_t)len; } /* VFS file descriptors (fd >= 3) */ if (fd >= 3) { return vfs_write(fd, buf, len); } return -1; /* Invalid fd */ } /** * Handle SYS_READ: read bytes from a file descriptor. * fd=0 (stdin) reads from the keyboard buffer (non-blocking). * Returns 0 if no data available; caller should yield and retry. */ static int32_t sys_read(registers_t *regs) { int fd = (int)regs->ebx; char *buf = (char *)regs->ecx; uint32_t len = regs->edx; if (fd == 0) { /* stdin: non-blocking read from keyboard */ if (keyboard_has_data()) { uint32_t n = keyboard_read(buf, len); return (int32_t)n; } return 0; /* No data available */ } /* VFS file descriptors (fd >= 3) */ if (fd >= 3) { return vfs_read(fd, buf, len); } return -1; /* Invalid fd */ } /** * Handle SYS_FORK: fork the current process. */ static int32_t sys_fork(registers_t *regs) { return process_fork(regs); } /** * Handle SYS_GETPID: return the current process ID. */ static int32_t sys_getpid(registers_t *regs) { (void)regs; process_t *cur = process_current(); return cur ? (int32_t)cur->pid : -1; } /** * Handle SYS_YIELD: voluntarily yield the CPU. * Calls schedule_tick directly to potentially switch to another process. */ static int32_t sys_yield(registers_t *regs) { schedule_tick(regs); return SYSCALL_SWITCHED; } /** * Handle SYS_WAITPID: wait for a child to exit. * * If the child is already a zombie, reaps immediately and returns the code. * Otherwise, blocks the current process and switches to the next one. * When the child exits, process_exit() will unblock the waiting parent * and set its saved_regs.eax to the exit code. */ static int32_t sys_waitpid(registers_t *regs) { uint32_t pid = regs->ebx; process_t *child = process_get(pid); if (!child) { return -1; } /* If child already exited, reap immediately */ if (child->state == PROCESS_ZOMBIE) { int32_t code = child->exit_code; child->state = PROCESS_UNUSED; return code; } /* Block the current process until the child exits */ process_t *cur = process_current(); cur->state = PROCESS_BLOCKED; cur->waiting_for_pid = pid; /* Save the current syscall registers so we resume here when unblocked. * The return value (eax) will be set by process_exit when the child dies. */ cur->saved_regs = *regs; /* Schedule the next process. This modifies *regs to the next process's * saved state, so when the ISR stub does iret, it enters the next process. */ schedule_tick(regs); /* Tell syscall_handler not to overwrite regs->eax, since regs now * points to the next process's registers on the kernel stack. */ return SYSCALL_SWITCHED; } /** * Handle SYS_EXEC: replace the current process image with a new program. * EBX = path to binary (C string), e.g. "hello-world". * Loads the binary from the initrd and replaces the current process's * code and stack. Does not return on success. */ static int32_t sys_exec(registers_t *regs) { const char *path = (const char *)regs->ebx; if (!path) return -1; process_t *cur = process_current(); if (!cur) return -1; /* Look up the binary in the initrd */ cpio_entry_t entry; if (cpio_find(path, &entry) != 0) { return -1; /* Not found */ } uint32_t *pd = (uint32_t *)cur->page_directory; /* Unmap and free old user code pages (0x08048000 region). * We don't know exactly how many pages were mapped, so scan a * reasonable range. */ for (uint32_t vaddr = USER_CODE_START; vaddr < USER_CODE_START + 0x100000; /* up to 1 MiB of code */ vaddr += 4096) { uint32_t pd_idx = vaddr >> 22; uint32_t pt_idx = (vaddr >> 12) & 0x3FF; if (!(pd[pd_idx] & 0x001)) break; /* No page table */ uint32_t *pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000); if (!(pt[pt_idx] & 0x001)) break; /* No page */ phys_addr_t old_phys = pt[pt_idx] & 0xFFFFF000; pt[pt_idx] = 0; pmm_free_page(old_phys); } /* Map new code pages */ uint32_t code_pages = (entry.datasize + 4095) / 4096; for (uint32_t i = 0; i < code_pages; i++) { phys_addr_t phys = pmm_alloc_page(PMM_ZONE_NORMAL); if (phys == 0) return -1; uint32_t vaddr = USER_CODE_START + i * 4096; paging_map_page_in(pd, vaddr, phys, PAGE_PRESENT | PAGE_WRITE | PAGE_USER); uint32_t offset = i * 4096; uint32_t bytes = entry.datasize - offset; if (bytes > 4096) bytes = 4096; memcpy((void *)phys, (const uint8_t *)entry.data + offset, bytes); if (bytes < 4096) { memset((void *)(phys + bytes), 0, 4096 - bytes); } } /* Zero the user stack pages (reuse existing stack mappings) */ uint32_t stack_base = USER_STACK_TOP - USER_STACK_PAGES * 4096; for (uint32_t i = 0; i < USER_STACK_PAGES; i++) { uint32_t vaddr = stack_base + i * 4096; uint32_t pd_idx = vaddr >> 22; uint32_t pt_idx = (vaddr >> 12) & 0x3FF; if ((pd[pd_idx] & 0x001)) { uint32_t *pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000); if ((pt[pt_idx] & 0x001)) { phys_addr_t phys = pt[pt_idx] & 0xFFFFF000; memset((void *)phys, 0, 4096); } } } /* Flush TLB */ paging_switch_directory(cur->page_directory); /* Update process name */ uint32_t nlen = strlen(path); if (nlen > 31) nlen = 31; memcpy(cur->name, path, nlen); cur->name[nlen] = '\0'; /* Set up registers for the new program */ regs->eip = USER_CODE_START; regs->useresp = USER_STACK_TOP; regs->esp = USER_STACK_TOP; regs->eax = 0; regs->ebx = 0; regs->ecx = 0; regs->edx = 0; regs->esi = 0; regs->edi = 0; regs->ebp = 0; regs->cs = 0x1B; regs->ds = 0x23; regs->ss = 0x23; regs->eflags = 0x202; /* IF=1 */ /* Return SYSCALL_SWITCHED so syscall_handler doesn't overwrite regs */ return SYSCALL_SWITCHED; } /** * Handle SYS_GETENV: get an environment variable. * EBX = key pointer, ECX = value buffer pointer, EDX = buffer size. * Returns length of value, or -1 if not found. */ static int32_t sys_getenv(registers_t *regs) { const char *key = (const char *)regs->ebx; char *buf = (char *)regs->ecx; uint32_t bufsize = regs->edx; process_t *cur = process_current(); if (!cur) return -1; return env_get(&cur->env, key, buf, bufsize); } /** * Handle SYS_SETENV: set an environment variable. * EBX = key pointer, ECX = value pointer (NULL to unset). * Returns 0 on success, -1 on error. */ static int32_t sys_setenv(registers_t *regs) { const char *key = (const char *)regs->ebx; const char *value = (const char *)regs->ecx; process_t *cur = process_current(); if (!cur) return -1; return env_set(&cur->env, key, value); } /** * Handle SYS_OPEN: open a file by path. * EBX = path string, ECX = flags. * Returns file descriptor (>= 3) on success, -1 on failure. */ static int32_t sys_open(registers_t *regs) { const char *path = (const char *)regs->ebx; uint32_t flags = regs->ecx; return (int32_t)vfs_open(path, flags); } /** * Handle SYS_CLOSE: close a file descriptor. * EBX = fd. * Returns 0 on success, -1 on failure. */ static int32_t sys_close(registers_t *regs) { int fd = (int)regs->ebx; if (fd < 3) return -1; /* Don't close stdin/stdout/stderr */ vfs_close(fd); return 0; } /** * Handle SYS_READDIR: read a directory entry. * EBX = path, ECX = index, EDX = name buffer (128 bytes min). * Returns entry type (VFS_FILE=1, VFS_DIRECTORY=2, ...) on success, -1 at end. */ static int32_t sys_readdir(registers_t *regs) { const char *path = (const char *)regs->ebx; uint32_t idx = regs->ecx; char *name_buf = (char *)regs->edx; vfs_dirent_t entry; if (vfs_readdir(path, idx, &entry) != 0) { return -1; } /* Copy entry name to user buffer */ uint32_t len = strlen(entry.name); if (len >= 128) len = 127; memcpy(name_buf, entry.name, len); name_buf[len] = '\0'; return (int32_t)entry.type; } /** System call dispatch table. */ typedef int32_t (*syscall_fn)(registers_t *); static syscall_fn syscall_table[NUM_SYSCALLS] = { [SYS_EXIT] = sys_exit, [SYS_WRITE] = sys_write, [SYS_READ] = sys_read, [SYS_FORK] = sys_fork, [SYS_GETPID] = sys_getpid, [SYS_YIELD] = sys_yield, [SYS_WAITPID] = sys_waitpid, [SYS_EXEC] = sys_exec, [SYS_GETENV] = sys_getenv, [SYS_SETENV] = sys_setenv, [SYS_READDIR] = sys_readdir, [SYS_OPEN] = sys_open, [SYS_CLOSE] = sys_close, }; void syscall_handler(registers_t *regs) { uint32_t num = regs->eax; if (num >= NUM_SYSCALLS || !syscall_table[num]) { offset_print(" SYSCALL: invalid syscall "); print_hex(num); regs->eax = (uint32_t)-1; return; } int32_t ret = syscall_table[num](regs); if (ret != SYSCALL_SWITCHED) { regs->eax = (uint32_t)ret; } } void init_syscalls(void) { /* Install INT 0x80 as a user-callable interrupt gate. * Flags: 0xEE = Present(1) DPL(11) 0 Type(1110) = 32-bit Interrupt Gate, Ring 3 callable */ set_idt_gate_from_c(0x80, (uint32_t)isr128, 0x08, 0xEE); offset_print(" SYSCALL: INT 0x80 installed\n"); }