Sysfs: - New VFS driver mounted at /sys that lets kernel drivers expose virtual text files via namespace registration - Drivers call sysfs_register(name, ops, ctx) with list/read/write callbacks for their namespace - IDE driver registers 'ide' namespace exposing per-device attributes: model, type, channel, drive, sectors, sector_size - Tested: ls /sys -> ide, ls /sys/ide -> hdd1 cd1, cat /sys/ide/hdd1/model -> QEMU HARDDISK Syscalls: - Added SYS_OPEN (11) and SYS_CLOSE (12) for file I/O from userspace - Extended SYS_READ/SYS_WRITE to handle VFS file descriptors (fd >= 3) - Updated userspace syscalls.h with open()/close() wrappers Apps: - New 'cat' app: reads and displays file contents via open/read/close - Updated 'ls' to accept path argument via ARG1 env var - Updated shell to pass ARG1 env var to external commands
382 lines
11 KiB
C
382 lines
11 KiB
C
/**
|
|
* @file syscall.c
|
|
* @brief System call handler implementation.
|
|
*
|
|
* Dispatches INT 0x80 system calls to the appropriate kernel function.
|
|
* System call number is in EAX, arguments in EBX, ECX, EDX, ESI, EDI.
|
|
* Return value is placed in EAX.
|
|
*/
|
|
|
|
#include "syscall.h"
|
|
#include "process.h"
|
|
#include "env.h"
|
|
#include "port_io.h"
|
|
#include "vga.h"
|
|
#include "vfs.h"
|
|
#include "keyboard.h"
|
|
#include "cpio.h"
|
|
#include "paging.h"
|
|
#include "pmm.h"
|
|
#include <stddef.h>
|
|
#include <string.h>
|
|
|
|
/** Magic return value indicating the syscall blocked and switched processes.
|
|
* syscall_handler must NOT overwrite regs->eax in this case. */
|
|
#define SYSCALL_SWITCHED 0x7FFFFFFF
|
|
|
|
/* Debug print helpers defined in kernel.c */
|
|
extern void offset_print(const char *str);
|
|
extern void print_hex(uint32_t val);
|
|
|
|
/** IDT gate setup (from idt.c) */
|
|
extern void set_idt_gate_from_c(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags);
|
|
|
|
/** INT 0x80 assembly stub */
|
|
extern void isr128(void);
|
|
|
|
/**
|
|
* Handle SYS_EXIT: terminate the current process.
|
|
*/
|
|
static int32_t sys_exit(registers_t *regs) {
|
|
process_exit((int32_t)regs->ebx);
|
|
/* Never returns */
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_WRITE: write bytes to a file descriptor.
|
|
* Currently only supports fd=1 (stdout) -> debug port + VGA.
|
|
*/
|
|
static int32_t sys_write(registers_t *regs) {
|
|
int fd = (int)regs->ebx;
|
|
const char *buf = (const char *)regs->ecx;
|
|
uint32_t len = regs->edx;
|
|
|
|
if (fd == 1 || fd == 2) {
|
|
/* stdout or stderr: write to debug port and VGA */
|
|
for (uint32_t i = 0; i < len; i++) {
|
|
outb(0xE9, buf[i]);
|
|
vga_putchar(buf[i]);
|
|
}
|
|
return (int32_t)len;
|
|
}
|
|
|
|
/* VFS file descriptors (fd >= 3) */
|
|
if (fd >= 3) {
|
|
return vfs_write(fd, buf, len);
|
|
}
|
|
|
|
return -1; /* Invalid fd */
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_READ: read bytes from a file descriptor.
|
|
* fd=0 (stdin) reads from the keyboard buffer (non-blocking).
|
|
* Returns 0 if no data available; caller should yield and retry.
|
|
*/
|
|
static int32_t sys_read(registers_t *regs) {
|
|
int fd = (int)regs->ebx;
|
|
char *buf = (char *)regs->ecx;
|
|
uint32_t len = regs->edx;
|
|
|
|
if (fd == 0) {
|
|
/* stdin: non-blocking read from keyboard */
|
|
if (keyboard_has_data()) {
|
|
uint32_t n = keyboard_read(buf, len);
|
|
return (int32_t)n;
|
|
}
|
|
return 0; /* No data available */
|
|
}
|
|
|
|
/* VFS file descriptors (fd >= 3) */
|
|
if (fd >= 3) {
|
|
return vfs_read(fd, buf, len);
|
|
}
|
|
|
|
return -1; /* Invalid fd */
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_FORK: fork the current process.
|
|
*/
|
|
static int32_t sys_fork(registers_t *regs) {
|
|
return process_fork(regs);
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_GETPID: return the current process ID.
|
|
*/
|
|
static int32_t sys_getpid(registers_t *regs) {
|
|
(void)regs;
|
|
process_t *cur = process_current();
|
|
return cur ? (int32_t)cur->pid : -1;
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_YIELD: voluntarily yield the CPU.
|
|
* Calls schedule_tick directly to potentially switch to another process.
|
|
*/
|
|
static int32_t sys_yield(registers_t *regs) {
|
|
schedule_tick(regs);
|
|
return SYSCALL_SWITCHED;
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_WAITPID: wait for a child to exit.
|
|
*
|
|
* If the child is already a zombie, reaps immediately and returns the code.
|
|
* Otherwise, blocks the current process and switches to the next one.
|
|
* When the child exits, process_exit() will unblock the waiting parent
|
|
* and set its saved_regs.eax to the exit code.
|
|
*/
|
|
static int32_t sys_waitpid(registers_t *regs) {
|
|
uint32_t pid = regs->ebx;
|
|
process_t *child = process_get(pid);
|
|
if (!child) {
|
|
return -1;
|
|
}
|
|
|
|
/* If child already exited, reap immediately */
|
|
if (child->state == PROCESS_ZOMBIE) {
|
|
int32_t code = child->exit_code;
|
|
child->state = PROCESS_UNUSED;
|
|
return code;
|
|
}
|
|
|
|
/* Block the current process until the child exits */
|
|
process_t *cur = process_current();
|
|
cur->state = PROCESS_BLOCKED;
|
|
cur->waiting_for_pid = pid;
|
|
|
|
/* Save the current syscall registers so we resume here when unblocked.
|
|
* The return value (eax) will be set by process_exit when the child dies. */
|
|
cur->saved_regs = *regs;
|
|
|
|
/* Schedule the next process. This modifies *regs to the next process's
|
|
* saved state, so when the ISR stub does iret, it enters the next process. */
|
|
schedule_tick(regs);
|
|
|
|
/* Tell syscall_handler not to overwrite regs->eax, since regs now
|
|
* points to the next process's registers on the kernel stack. */
|
|
return SYSCALL_SWITCHED;
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_EXEC: replace the current process image with a new program.
|
|
* EBX = path to binary (C string), e.g. "hello-world".
|
|
* Loads the binary from the initrd and replaces the current process's
|
|
* code and stack. Does not return on success.
|
|
*/
|
|
static int32_t sys_exec(registers_t *regs) {
|
|
const char *path = (const char *)regs->ebx;
|
|
if (!path) return -1;
|
|
|
|
process_t *cur = process_current();
|
|
if (!cur) return -1;
|
|
|
|
/* Look up the binary in the initrd */
|
|
cpio_entry_t entry;
|
|
if (cpio_find(path, &entry) != 0) {
|
|
return -1; /* Not found */
|
|
}
|
|
|
|
uint32_t *pd = (uint32_t *)cur->page_directory;
|
|
|
|
/* Unmap and free old user code pages (0x08048000 region).
|
|
* We don't know exactly how many pages were mapped, so scan a
|
|
* reasonable range. */
|
|
for (uint32_t vaddr = USER_CODE_START;
|
|
vaddr < USER_CODE_START + 0x100000; /* up to 1 MiB of code */
|
|
vaddr += 4096) {
|
|
uint32_t pd_idx = vaddr >> 22;
|
|
uint32_t pt_idx = (vaddr >> 12) & 0x3FF;
|
|
if (!(pd[pd_idx] & 0x001)) break; /* No page table */
|
|
uint32_t *pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000);
|
|
if (!(pt[pt_idx] & 0x001)) break; /* No page */
|
|
phys_addr_t old_phys = pt[pt_idx] & 0xFFFFF000;
|
|
pt[pt_idx] = 0;
|
|
pmm_free_page(old_phys);
|
|
}
|
|
|
|
/* Map new code pages */
|
|
uint32_t code_pages = (entry.datasize + 4095) / 4096;
|
|
for (uint32_t i = 0; i < code_pages; i++) {
|
|
phys_addr_t phys = pmm_alloc_page(PMM_ZONE_NORMAL);
|
|
if (phys == 0) return -1;
|
|
|
|
uint32_t vaddr = USER_CODE_START + i * 4096;
|
|
paging_map_page_in(pd, vaddr, phys,
|
|
PAGE_PRESENT | PAGE_WRITE | PAGE_USER);
|
|
|
|
uint32_t offset = i * 4096;
|
|
uint32_t bytes = entry.datasize - offset;
|
|
if (bytes > 4096) bytes = 4096;
|
|
memcpy((void *)phys, (const uint8_t *)entry.data + offset, bytes);
|
|
if (bytes < 4096) {
|
|
memset((void *)(phys + bytes), 0, 4096 - bytes);
|
|
}
|
|
}
|
|
|
|
/* Zero the user stack pages (reuse existing stack mappings) */
|
|
uint32_t stack_base = USER_STACK_TOP - USER_STACK_PAGES * 4096;
|
|
for (uint32_t i = 0; i < USER_STACK_PAGES; i++) {
|
|
uint32_t vaddr = stack_base + i * 4096;
|
|
uint32_t pd_idx = vaddr >> 22;
|
|
uint32_t pt_idx = (vaddr >> 12) & 0x3FF;
|
|
if ((pd[pd_idx] & 0x001)) {
|
|
uint32_t *pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000);
|
|
if ((pt[pt_idx] & 0x001)) {
|
|
phys_addr_t phys = pt[pt_idx] & 0xFFFFF000;
|
|
memset((void *)phys, 0, 4096);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Flush TLB */
|
|
paging_switch_directory(cur->page_directory);
|
|
|
|
/* Update process name */
|
|
uint32_t nlen = strlen(path);
|
|
if (nlen > 31) nlen = 31;
|
|
memcpy(cur->name, path, nlen);
|
|
cur->name[nlen] = '\0';
|
|
|
|
/* Set up registers for the new program */
|
|
regs->eip = USER_CODE_START;
|
|
regs->useresp = USER_STACK_TOP;
|
|
regs->esp = USER_STACK_TOP;
|
|
regs->eax = 0;
|
|
regs->ebx = 0;
|
|
regs->ecx = 0;
|
|
regs->edx = 0;
|
|
regs->esi = 0;
|
|
regs->edi = 0;
|
|
regs->ebp = 0;
|
|
regs->cs = 0x1B;
|
|
regs->ds = 0x23;
|
|
regs->ss = 0x23;
|
|
regs->eflags = 0x202; /* IF=1 */
|
|
|
|
/* Return SYSCALL_SWITCHED so syscall_handler doesn't overwrite regs */
|
|
return SYSCALL_SWITCHED;
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_GETENV: get an environment variable.
|
|
* EBX = key pointer, ECX = value buffer pointer, EDX = buffer size.
|
|
* Returns length of value, or -1 if not found.
|
|
*/
|
|
static int32_t sys_getenv(registers_t *regs) {
|
|
const char *key = (const char *)regs->ebx;
|
|
char *buf = (char *)regs->ecx;
|
|
uint32_t bufsize = regs->edx;
|
|
|
|
process_t *cur = process_current();
|
|
if (!cur) return -1;
|
|
|
|
return env_get(&cur->env, key, buf, bufsize);
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_SETENV: set an environment variable.
|
|
* EBX = key pointer, ECX = value pointer (NULL to unset).
|
|
* Returns 0 on success, -1 on error.
|
|
*/
|
|
static int32_t sys_setenv(registers_t *regs) {
|
|
const char *key = (const char *)regs->ebx;
|
|
const char *value = (const char *)regs->ecx;
|
|
|
|
process_t *cur = process_current();
|
|
if (!cur) return -1;
|
|
|
|
return env_set(&cur->env, key, value);
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_OPEN: open a file by path.
|
|
* EBX = path string, ECX = flags.
|
|
* Returns file descriptor (>= 3) on success, -1 on failure.
|
|
*/
|
|
static int32_t sys_open(registers_t *regs) {
|
|
const char *path = (const char *)regs->ebx;
|
|
uint32_t flags = regs->ecx;
|
|
return (int32_t)vfs_open(path, flags);
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_CLOSE: close a file descriptor.
|
|
* EBX = fd.
|
|
* Returns 0 on success, -1 on failure.
|
|
*/
|
|
static int32_t sys_close(registers_t *regs) {
|
|
int fd = (int)regs->ebx;
|
|
if (fd < 3) return -1; /* Don't close stdin/stdout/stderr */
|
|
vfs_close(fd);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Handle SYS_READDIR: read a directory entry.
|
|
* EBX = path, ECX = index, EDX = name buffer (128 bytes min).
|
|
* Returns entry type (VFS_FILE=1, VFS_DIRECTORY=2, ...) on success, -1 at end.
|
|
*/
|
|
static int32_t sys_readdir(registers_t *regs) {
|
|
const char *path = (const char *)regs->ebx;
|
|
uint32_t idx = regs->ecx;
|
|
char *name_buf = (char *)regs->edx;
|
|
|
|
vfs_dirent_t entry;
|
|
if (vfs_readdir(path, idx, &entry) != 0) {
|
|
return -1;
|
|
}
|
|
|
|
/* Copy entry name to user buffer */
|
|
uint32_t len = strlen(entry.name);
|
|
if (len >= 128) len = 127;
|
|
memcpy(name_buf, entry.name, len);
|
|
name_buf[len] = '\0';
|
|
|
|
return (int32_t)entry.type;
|
|
}
|
|
|
|
/** System call dispatch table. */
|
|
typedef int32_t (*syscall_fn)(registers_t *);
|
|
static syscall_fn syscall_table[NUM_SYSCALLS] = {
|
|
[SYS_EXIT] = sys_exit,
|
|
[SYS_WRITE] = sys_write,
|
|
[SYS_READ] = sys_read,
|
|
[SYS_FORK] = sys_fork,
|
|
[SYS_GETPID] = sys_getpid,
|
|
[SYS_YIELD] = sys_yield,
|
|
[SYS_WAITPID] = sys_waitpid,
|
|
[SYS_EXEC] = sys_exec,
|
|
[SYS_GETENV] = sys_getenv,
|
|
[SYS_SETENV] = sys_setenv,
|
|
[SYS_READDIR] = sys_readdir,
|
|
[SYS_OPEN] = sys_open,
|
|
[SYS_CLOSE] = sys_close,
|
|
};
|
|
|
|
void syscall_handler(registers_t *regs) {
|
|
uint32_t num = regs->eax;
|
|
|
|
if (num >= NUM_SYSCALLS || !syscall_table[num]) {
|
|
offset_print(" SYSCALL: invalid syscall ");
|
|
print_hex(num);
|
|
regs->eax = (uint32_t)-1;
|
|
return;
|
|
}
|
|
|
|
int32_t ret = syscall_table[num](regs);
|
|
if (ret != SYSCALL_SWITCHED) {
|
|
regs->eax = (uint32_t)ret;
|
|
}
|
|
}
|
|
|
|
void init_syscalls(void) {
|
|
/* Install INT 0x80 as a user-callable interrupt gate.
|
|
* Flags: 0xEE = Present(1) DPL(11) 0 Type(1110) = 32-bit Interrupt Gate, Ring 3 callable */
|
|
set_idt_gate_from_c(0x80, (uint32_t)isr128, 0x08, 0xEE);
|
|
offset_print(" SYSCALL: INT 0x80 installed\n");
|
|
}
|