Files
claude-os/src/syscall.c
AI d1bf69ce0d Add sysfs VFS driver, SYS_OPEN/CLOSE syscalls, cat app
Sysfs:
- New VFS driver mounted at /sys that lets kernel drivers expose
  virtual text files via namespace registration
- Drivers call sysfs_register(name, ops, ctx) with list/read/write
  callbacks for their namespace
- IDE driver registers 'ide' namespace exposing per-device attributes:
  model, type, channel, drive, sectors, sector_size
- Tested: ls /sys -> ide, ls /sys/ide -> hdd1 cd1,
  cat /sys/ide/hdd1/model -> QEMU HARDDISK

Syscalls:
- Added SYS_OPEN (11) and SYS_CLOSE (12) for file I/O from userspace
- Extended SYS_READ/SYS_WRITE to handle VFS file descriptors (fd >= 3)
- Updated userspace syscalls.h with open()/close() wrappers

Apps:
- New 'cat' app: reads and displays file contents via open/read/close
- Updated 'ls' to accept path argument via ARG1 env var
- Updated shell to pass ARG1 env var to external commands
2026-02-23 14:26:52 +00:00

382 lines
11 KiB
C

/**
* @file syscall.c
* @brief System call handler implementation.
*
* Dispatches INT 0x80 system calls to the appropriate kernel function.
* System call number is in EAX, arguments in EBX, ECX, EDX, ESI, EDI.
* Return value is placed in EAX.
*/
#include "syscall.h"
#include "process.h"
#include "env.h"
#include "port_io.h"
#include "vga.h"
#include "vfs.h"
#include "keyboard.h"
#include "cpio.h"
#include "paging.h"
#include "pmm.h"
#include <stddef.h>
#include <string.h>
/** Magic return value indicating the syscall blocked and switched processes.
* syscall_handler must NOT overwrite regs->eax in this case. */
#define SYSCALL_SWITCHED 0x7FFFFFFF
/* Debug print helpers defined in kernel.c */
extern void offset_print(const char *str);
extern void print_hex(uint32_t val);
/** IDT gate setup (from idt.c) */
extern void set_idt_gate_from_c(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags);
/** INT 0x80 assembly stub */
extern void isr128(void);
/**
* Handle SYS_EXIT: terminate the current process.
*/
static int32_t sys_exit(registers_t *regs) {
process_exit((int32_t)regs->ebx);
/* Never returns */
return 0;
}
/**
* Handle SYS_WRITE: write bytes to a file descriptor.
* Currently only supports fd=1 (stdout) -> debug port + VGA.
*/
static int32_t sys_write(registers_t *regs) {
int fd = (int)regs->ebx;
const char *buf = (const char *)regs->ecx;
uint32_t len = regs->edx;
if (fd == 1 || fd == 2) {
/* stdout or stderr: write to debug port and VGA */
for (uint32_t i = 0; i < len; i++) {
outb(0xE9, buf[i]);
vga_putchar(buf[i]);
}
return (int32_t)len;
}
/* VFS file descriptors (fd >= 3) */
if (fd >= 3) {
return vfs_write(fd, buf, len);
}
return -1; /* Invalid fd */
}
/**
* Handle SYS_READ: read bytes from a file descriptor.
* fd=0 (stdin) reads from the keyboard buffer (non-blocking).
* Returns 0 if no data available; caller should yield and retry.
*/
static int32_t sys_read(registers_t *regs) {
int fd = (int)regs->ebx;
char *buf = (char *)regs->ecx;
uint32_t len = regs->edx;
if (fd == 0) {
/* stdin: non-blocking read from keyboard */
if (keyboard_has_data()) {
uint32_t n = keyboard_read(buf, len);
return (int32_t)n;
}
return 0; /* No data available */
}
/* VFS file descriptors (fd >= 3) */
if (fd >= 3) {
return vfs_read(fd, buf, len);
}
return -1; /* Invalid fd */
}
/**
* Handle SYS_FORK: fork the current process.
*/
static int32_t sys_fork(registers_t *regs) {
return process_fork(regs);
}
/**
* Handle SYS_GETPID: return the current process ID.
*/
static int32_t sys_getpid(registers_t *regs) {
(void)regs;
process_t *cur = process_current();
return cur ? (int32_t)cur->pid : -1;
}
/**
* Handle SYS_YIELD: voluntarily yield the CPU.
* Calls schedule_tick directly to potentially switch to another process.
*/
static int32_t sys_yield(registers_t *regs) {
schedule_tick(regs);
return SYSCALL_SWITCHED;
}
/**
* Handle SYS_WAITPID: wait for a child to exit.
*
* If the child is already a zombie, reaps immediately and returns the code.
* Otherwise, blocks the current process and switches to the next one.
* When the child exits, process_exit() will unblock the waiting parent
* and set its saved_regs.eax to the exit code.
*/
static int32_t sys_waitpid(registers_t *regs) {
uint32_t pid = regs->ebx;
process_t *child = process_get(pid);
if (!child) {
return -1;
}
/* If child already exited, reap immediately */
if (child->state == PROCESS_ZOMBIE) {
int32_t code = child->exit_code;
child->state = PROCESS_UNUSED;
return code;
}
/* Block the current process until the child exits */
process_t *cur = process_current();
cur->state = PROCESS_BLOCKED;
cur->waiting_for_pid = pid;
/* Save the current syscall registers so we resume here when unblocked.
* The return value (eax) will be set by process_exit when the child dies. */
cur->saved_regs = *regs;
/* Schedule the next process. This modifies *regs to the next process's
* saved state, so when the ISR stub does iret, it enters the next process. */
schedule_tick(regs);
/* Tell syscall_handler not to overwrite regs->eax, since regs now
* points to the next process's registers on the kernel stack. */
return SYSCALL_SWITCHED;
}
/**
* Handle SYS_EXEC: replace the current process image with a new program.
* EBX = path to binary (C string), e.g. "hello-world".
* Loads the binary from the initrd and replaces the current process's
* code and stack. Does not return on success.
*/
static int32_t sys_exec(registers_t *regs) {
const char *path = (const char *)regs->ebx;
if (!path) return -1;
process_t *cur = process_current();
if (!cur) return -1;
/* Look up the binary in the initrd */
cpio_entry_t entry;
if (cpio_find(path, &entry) != 0) {
return -1; /* Not found */
}
uint32_t *pd = (uint32_t *)cur->page_directory;
/* Unmap and free old user code pages (0x08048000 region).
* We don't know exactly how many pages were mapped, so scan a
* reasonable range. */
for (uint32_t vaddr = USER_CODE_START;
vaddr < USER_CODE_START + 0x100000; /* up to 1 MiB of code */
vaddr += 4096) {
uint32_t pd_idx = vaddr >> 22;
uint32_t pt_idx = (vaddr >> 12) & 0x3FF;
if (!(pd[pd_idx] & 0x001)) break; /* No page table */
uint32_t *pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000);
if (!(pt[pt_idx] & 0x001)) break; /* No page */
phys_addr_t old_phys = pt[pt_idx] & 0xFFFFF000;
pt[pt_idx] = 0;
pmm_free_page(old_phys);
}
/* Map new code pages */
uint32_t code_pages = (entry.datasize + 4095) / 4096;
for (uint32_t i = 0; i < code_pages; i++) {
phys_addr_t phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (phys == 0) return -1;
uint32_t vaddr = USER_CODE_START + i * 4096;
paging_map_page_in(pd, vaddr, phys,
PAGE_PRESENT | PAGE_WRITE | PAGE_USER);
uint32_t offset = i * 4096;
uint32_t bytes = entry.datasize - offset;
if (bytes > 4096) bytes = 4096;
memcpy((void *)phys, (const uint8_t *)entry.data + offset, bytes);
if (bytes < 4096) {
memset((void *)(phys + bytes), 0, 4096 - bytes);
}
}
/* Zero the user stack pages (reuse existing stack mappings) */
uint32_t stack_base = USER_STACK_TOP - USER_STACK_PAGES * 4096;
for (uint32_t i = 0; i < USER_STACK_PAGES; i++) {
uint32_t vaddr = stack_base + i * 4096;
uint32_t pd_idx = vaddr >> 22;
uint32_t pt_idx = (vaddr >> 12) & 0x3FF;
if ((pd[pd_idx] & 0x001)) {
uint32_t *pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000);
if ((pt[pt_idx] & 0x001)) {
phys_addr_t phys = pt[pt_idx] & 0xFFFFF000;
memset((void *)phys, 0, 4096);
}
}
}
/* Flush TLB */
paging_switch_directory(cur->page_directory);
/* Update process name */
uint32_t nlen = strlen(path);
if (nlen > 31) nlen = 31;
memcpy(cur->name, path, nlen);
cur->name[nlen] = '\0';
/* Set up registers for the new program */
regs->eip = USER_CODE_START;
regs->useresp = USER_STACK_TOP;
regs->esp = USER_STACK_TOP;
regs->eax = 0;
regs->ebx = 0;
regs->ecx = 0;
regs->edx = 0;
regs->esi = 0;
regs->edi = 0;
regs->ebp = 0;
regs->cs = 0x1B;
regs->ds = 0x23;
regs->ss = 0x23;
regs->eflags = 0x202; /* IF=1 */
/* Return SYSCALL_SWITCHED so syscall_handler doesn't overwrite regs */
return SYSCALL_SWITCHED;
}
/**
* Handle SYS_GETENV: get an environment variable.
* EBX = key pointer, ECX = value buffer pointer, EDX = buffer size.
* Returns length of value, or -1 if not found.
*/
static int32_t sys_getenv(registers_t *regs) {
const char *key = (const char *)regs->ebx;
char *buf = (char *)regs->ecx;
uint32_t bufsize = regs->edx;
process_t *cur = process_current();
if (!cur) return -1;
return env_get(&cur->env, key, buf, bufsize);
}
/**
* Handle SYS_SETENV: set an environment variable.
* EBX = key pointer, ECX = value pointer (NULL to unset).
* Returns 0 on success, -1 on error.
*/
static int32_t sys_setenv(registers_t *regs) {
const char *key = (const char *)regs->ebx;
const char *value = (const char *)regs->ecx;
process_t *cur = process_current();
if (!cur) return -1;
return env_set(&cur->env, key, value);
}
/**
* Handle SYS_OPEN: open a file by path.
* EBX = path string, ECX = flags.
* Returns file descriptor (>= 3) on success, -1 on failure.
*/
static int32_t sys_open(registers_t *regs) {
const char *path = (const char *)regs->ebx;
uint32_t flags = regs->ecx;
return (int32_t)vfs_open(path, flags);
}
/**
* Handle SYS_CLOSE: close a file descriptor.
* EBX = fd.
* Returns 0 on success, -1 on failure.
*/
static int32_t sys_close(registers_t *regs) {
int fd = (int)regs->ebx;
if (fd < 3) return -1; /* Don't close stdin/stdout/stderr */
vfs_close(fd);
return 0;
}
/**
* Handle SYS_READDIR: read a directory entry.
* EBX = path, ECX = index, EDX = name buffer (128 bytes min).
* Returns entry type (VFS_FILE=1, VFS_DIRECTORY=2, ...) on success, -1 at end.
*/
static int32_t sys_readdir(registers_t *regs) {
const char *path = (const char *)regs->ebx;
uint32_t idx = regs->ecx;
char *name_buf = (char *)regs->edx;
vfs_dirent_t entry;
if (vfs_readdir(path, idx, &entry) != 0) {
return -1;
}
/* Copy entry name to user buffer */
uint32_t len = strlen(entry.name);
if (len >= 128) len = 127;
memcpy(name_buf, entry.name, len);
name_buf[len] = '\0';
return (int32_t)entry.type;
}
/** System call dispatch table. */
typedef int32_t (*syscall_fn)(registers_t *);
static syscall_fn syscall_table[NUM_SYSCALLS] = {
[SYS_EXIT] = sys_exit,
[SYS_WRITE] = sys_write,
[SYS_READ] = sys_read,
[SYS_FORK] = sys_fork,
[SYS_GETPID] = sys_getpid,
[SYS_YIELD] = sys_yield,
[SYS_WAITPID] = sys_waitpid,
[SYS_EXEC] = sys_exec,
[SYS_GETENV] = sys_getenv,
[SYS_SETENV] = sys_setenv,
[SYS_READDIR] = sys_readdir,
[SYS_OPEN] = sys_open,
[SYS_CLOSE] = sys_close,
};
void syscall_handler(registers_t *regs) {
uint32_t num = regs->eax;
if (num >= NUM_SYSCALLS || !syscall_table[num]) {
offset_print(" SYSCALL: invalid syscall ");
print_hex(num);
regs->eax = (uint32_t)-1;
return;
}
int32_t ret = syscall_table[num](regs);
if (ret != SYSCALL_SWITCHED) {
regs->eax = (uint32_t)ret;
}
}
void init_syscalls(void) {
/* Install INT 0x80 as a user-callable interrupt gate.
* Flags: 0xEE = Present(1) DPL(11) 0 Type(1110) = 32-bit Interrupt Gate, Ring 3 callable */
set_idt_gate_from_c(0x80, (uint32_t)isr128, 0x08, 0xEE);
offset_print(" SYSCALL: INT 0x80 installed\n");
}