feat: implement fork system call with deep address space cloning (AI)

- Added paging_clone_directory_from(): deep-copies user-space pages so
  parent and child have independent memory. Kernel pages are shared.
- Fixed process_fork() to accept registers_t* for accurate child state,
  and to clone from the parent's page directory (not the kernel's).
- Refactored process_exit() to properly context-switch to next process
  using new process_switch_to_user assembly stub (loads full registers_t
  and performs iret), instead of halting unconditionally.
- Fixed sys_waitpid() to use proper blocking: marks process BLOCKED,
  invokes scheduler, and resumes with exit code when child dies.
- Added SYSCALL_SWITCHED mechanism to prevent syscall_handler from
  clobbering the next process's EAX after a context switch.
- Created fork-test user app that validates fork + waitpid.
- Added docs/fork.md with architecture documentation.

Tested: fork-test creates child, both print messages, parent waits for
child exit (code 7), parent reaps and exits (code 0). hello-world also
verified to still work correctly after the process_exit refactor.
This commit is contained in:
AI
2026-02-23 12:42:02 +00:00
parent f1de5b6da6
commit 42328ead0b
9 changed files with 350 additions and 30 deletions

View File

@@ -0,0 +1,79 @@
#
# fork-test: Tests the fork system call.
#
# 1. Calls SYS_FORK
# 2. Parent prints "Parent: pid=<pid>\n" and waits for child
# 3. Child prints "Child: pid=0\n" and exits with code 7
# 4. Parent exits with code 0
#
.section .text
.global _start
# System call numbers
.equ SYS_EXIT, 0
.equ SYS_WRITE, 1
.equ SYS_FORK, 3
.equ SYS_GETPID, 4
.equ SYS_WAITPID, 6
_start:
# Fork
movl $SYS_FORK, %eax
int $0x80
# EAX = 0 in child, child PID in parent
testl %eax, %eax
jz .child
.parent:
# Save child PID on the stack
pushl %eax
# Print "Parent\n"
movl $SYS_WRITE, %eax
movl $1, %ebx # fd = stdout
movl $parent_msg, %ecx
movl $parent_msg_len, %edx
int $0x80
# Waitpid for child
popl %ebx # child PID
movl $SYS_WAITPID, %eax
int $0x80
# EAX now has child's exit code (should be 7)
# Print "Reaped\n"
pushl %eax # save exit code
movl $SYS_WRITE, %eax
movl $1, %ebx
movl $reaped_msg, %ecx
movl $reaped_msg_len, %edx
int $0x80
popl %ebx # exit code (unused, exit with 0)
# Exit with code 0
movl $SYS_EXIT, %eax
movl $0, %ebx
int $0x80
.child:
# Print "Child\n"
movl $SYS_WRITE, %eax
movl $1, %ebx # fd = stdout
movl $child_msg, %ecx
movl $child_msg_len, %edx
int $0x80
# Exit with code 7
movl $SYS_EXIT, %eax
movl $7, %ebx
int $0x80
.section .rodata
parent_msg: .ascii "Parent\n"
.equ parent_msg_len, . - parent_msg
child_msg: .ascii "Child\n"
.equ child_msg_len, . - child_msg
reaped_msg: .ascii "Reaped\n"
.equ reaped_msg_len, . - reaped_msg

83
docs/fork.md Normal file
View File

@@ -0,0 +1,83 @@
# Fork System Call
## Overview
The `fork()` system call duplicates the calling process, creating a new child
process with an independent copy of the parent's address space.
## System Call Interface
- **Number**: `SYS_FORK` (3)
- **Arguments**: None
- **Returns**: Child PID in the parent, 0 in the child, -1 on error
## Implementation
### Address Space Cloning
`paging_clone_directory_from(src_pd_phys)` performs a deep copy of a process's
page directory:
1. **Kernel-space entries** (no `PAGE_USER` flag): shared directly between
parent and child. Both processes see the same kernel mappings.
2. **User-space entries** (`PAGE_USER` flag set): fully deep-copied. For each
user-space page directory entry:
- A new page table is allocated
- Each present user page has a new physical page allocated and the content
copied byte-for-byte
- This ensures parent and child have completely independent memory
### Register State
The child receives a copy of the parent's register state at the time of the
`INT 0x80` syscall, with `EAX` set to 0. This means the child resumes execution
at the instruction immediately following the `INT 0x80` that triggered fork.
### Process Exit and Waitpid
`process_exit()` was refactored to support multi-process scenarios:
- When a process exits, it scans for any process blocked on `waitpid()` for
its PID and unblocks it, setting the waiter's saved `EAX` to the exit code.
- If another process is ready, `process_switch_to_user()` is called to
directly context-switch via an assembly stub that loads the full register
set and performs `iret`.
- If no processes remain, the system halts.
`sys_waitpid()` supports blocking:
- If the child is already a zombie, it reaps immediately
- Otherwise, the caller is marked `PROCESS_BLOCKED` and the scheduler is
invoked to switch to another process
- When the child exits, the parent is unblocked with the exit code
### Assembly Support
`process_switch_to_user` in `interrupts.S` loads a full `registers_t` struct
and performs `iret` to enter user mode. This is used when `process_exit()`
needs to context-switch outside the normal ISR return path.
## Syscall Flow
```
User: INT 0x80 (EAX=SYS_FORK)
→ ISR stub pushes registers
→ isr_handler → syscall_handler → sys_fork(regs)
→ process_fork(regs)
→ Clone page directory with deep user-page copy
→ Copy current interrupt frame to child (EAX=0)
→ Return child PID to parent (via EAX)
→ ISR stub pops registers, iret
→ Parent continues with EAX=child_pid
→ [Timer interrupt] → scheduler picks child
→ Child starts with EAX=0
```
## Testing
The `fork-test` application validates fork by:
1. Calling `SYS_FORK`
2. Parent prints "Parent" and calls `SYS_WAITPID`
3. Child prints "Child" and exits with code 7
4. Parent reaps child, prints "Reaped", exits with code 0

View File

@@ -174,3 +174,28 @@ enter_usermode:
push $0x1B /* CS (user code) */
push %ecx /* EIP (entry point) */
iret
/*
* process_switch_to_user - Restore full register state and iret to user mode.
* void process_switch_to_user(registers_t *regs);
*
* Used by process_exit to context-switch to the next process when the normal
* interrupt-return path isn't available (because we're not returning through
* an ISR stub). Loads all registers from the registers_t struct and performs
* iret to enter user mode.
*/
.global process_switch_to_user
.type process_switch_to_user, @function
process_switch_to_user:
movl 4(%esp), %esp /* Point ESP to the registers_t struct */
/* Restore segment register (ds → all data segments) */
pop %eax
mov %ax, %ds
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
popa /* Restore EAX-EDI */
addl $8, %esp /* Skip int_no and err_code */
iret /* Pops EIP, CS, EFLAGS, UserESP, SS */

View File

@@ -153,15 +153,15 @@ void kernel_main(uint32_t magic, uint32_t addr) {
}
/* Load hello-world from the initrd and run it as a user process */
cpio_entry_t hello_entry;
if (cpio_find("hello-world", &hello_entry) == 0) {
cpio_entry_t app_entry;
if (cpio_find("hello-world", &app_entry) == 0) {
offset_print("Found hello-world in initrd (");
print_hex(hello_entry.datasize);
print_hex(app_entry.datasize);
offset_print(" bytes)\n");
int32_t pid = process_create("hello-world",
hello_entry.data,
hello_entry.datasize);
app_entry.data,
app_entry.datasize);
if (pid > 0) {
offset_print("Created hello-world process, pid=");
print_hex((uint32_t)pid);

View File

@@ -299,6 +299,64 @@ uint32_t paging_clone_directory(void) {
return new_dir_phys;
}
uint32_t paging_clone_directory_from(uint32_t src_pd_phys) {
uint32_t *src_pd = (uint32_t *)src_pd_phys;
/* Allocate a new page directory */
phys_addr_t new_pd_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_pd_phys == 0) {
offset_print(" PAGING: cannot allocate page directory for fork\n");
return 0;
}
uint32_t *new_pd = (uint32_t *)new_pd_phys;
/* Copy all page directory entries (shares kernel mappings) */
memcpy(new_pd, src_pd, 4096);
/* Deep-copy user-space page tables (those with PAGE_USER set) */
for (uint32_t i = 0; i < PAGE_ENTRIES; i++) {
if (!(src_pd[i] & PAGE_PRESENT)) continue;
if (!(src_pd[i] & PAGE_USER)) continue; /* kernel entry, shared */
uint32_t *src_pt = (uint32_t *)(src_pd[i] & 0xFFFFF000);
/* Allocate a new page table */
phys_addr_t new_pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_pt_phys == 0) {
offset_print(" PAGING: fork: cannot allocate page table\n");
return 0; /* TODO: free partially allocated pages */
}
uint32_t *new_pt = (uint32_t *)new_pt_phys;
/* Deep-copy each page in the page table */
for (uint32_t j = 0; j < PAGE_ENTRIES; j++) {
if (!(src_pt[j] & PAGE_PRESENT)) {
new_pt[j] = 0;
continue;
}
if (src_pt[j] & PAGE_USER) {
/* User page: allocate new physical page and copy content */
phys_addr_t old_phys = src_pt[j] & 0xFFFFF000;
phys_addr_t new_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_phys == 0) {
offset_print(" PAGING: fork: cannot allocate page\n");
return 0;
}
memcpy((void *)new_phys, (void *)old_phys, 4096);
new_pt[j] = new_phys | (src_pt[j] & 0xFFF);
} else {
/* Kernel page within a user page table: share directly */
new_pt[j] = src_pt[j];
}
}
new_pd[i] = new_pt_phys | (src_pd[i] & 0xFFF);
}
return new_pd_phys;
}
void paging_map_page_in(uint32_t *pd, uint32_t vaddr, uint32_t paddr, uint32_t flags) {
uint32_t pd_idx = PD_INDEX(vaddr);
uint32_t pt_idx = PT_INDEX(vaddr);

View File

@@ -98,6 +98,17 @@ uint32_t paging_get_directory_phys(void);
*/
uint32_t paging_clone_directory(void);
/**
* Clone a page directory, deep-copying all user-space pages.
* Kernel-space entries are shared (same page tables). User-space page
* tables and their physical pages are duplicated so the clone is fully
* independent.
*
* @param src_pd_phys Physical address of the source page directory.
* @return Physical address of the new page directory, or 0 on failure.
*/
uint32_t paging_clone_directory_from(uint32_t src_pd_phys);
/**
* Map a page in a specific page directory (not necessarily the active one).
*

View File

@@ -234,18 +234,43 @@ void process_exit(int32_t code) {
current_process->state = PROCESS_ZOMBIE;
current_process->exit_code = code;
/* Find another process to run.
* We construct a minimal register frame to pass to schedule_tick.
* Since the process is zombie, schedule_tick won't save its state. */
registers_t dummy;
memset(&dummy, 0, sizeof(dummy));
schedule_tick(&dummy);
/* If we get here, no other process was ready. Halt. */
offset_print(" PROCESS: no processes remaining, halting\n");
for (;;) {
__asm__ volatile("hlt");
/* Wake any process blocked on waitpid for this PID */
for (int i = 0; i < MAX_PROCESSES; i++) {
if (process_table[i].state == PROCESS_BLOCKED &&
process_table[i].waiting_for_pid == current_process->pid) {
process_table[i].state = PROCESS_READY;
process_table[i].saved_regs.eax = (uint32_t)code;
break;
}
}
/* Find next ready process to switch to */
process_t *next = NULL;
for (int i = 0; i < MAX_PROCESSES; i++) {
if (process_table[i].state == PROCESS_READY) {
next = &process_table[i];
break;
}
}
if (!next) {
offset_print(" PROCESS: no processes remaining, halting\n");
for (;;) {
__asm__ volatile("cli; hlt");
}
}
/* Context switch to the next process via assembly stub */
current_process = next;
next->state = PROCESS_RUNNING;
tss_set_kernel_stack(next->kernel_stack_top);
paging_switch_directory(next->page_directory);
extern void process_switch_to_user(registers_t *regs);
process_switch_to_user(&next->saved_regs);
/* Should never reach here */
__builtin_unreachable();
}
process_t *process_current(void) {
@@ -262,7 +287,7 @@ process_t *process_get(uint32_t pid) {
return NULL;
}
int32_t process_fork(void) {
int32_t process_fork(registers_t *regs) {
if (!current_process) {
return -1;
}
@@ -278,6 +303,7 @@ int32_t process_fork(void) {
child->pid = next_pid++;
child->state = PROCESS_READY;
child->parent_pid = current_process->pid;
child->waiting_for_pid = 0;
/* Allocate a separate kernel stack for the child */
void *child_kstack = paging_alloc_page();
@@ -288,17 +314,27 @@ int32_t process_fork(void) {
child->kernel_stack = (uint32_t)child_kstack;
child->kernel_stack_top = child->kernel_stack + 4096;
/* Clone the page directory */
child->page_directory = paging_clone_directory();
/* Deep-clone the parent's page directory (copies all user-space pages) */
child->page_directory = paging_clone_directory_from(current_process->page_directory);
if (!child->page_directory) {
kfree((void *)child->kernel_stack);
paging_free_page((void *)child->kernel_stack);
child->state = PROCESS_UNUSED;
return -1;
}
/* Copy the current syscall registers to the child.
* This ensures the child resumes at the same point as the parent
* (right after the INT 0x80 instruction). */
child->saved_regs = *regs;
/* Child's return value is 0 (in EAX) */
child->saved_regs.eax = 0;
offset_print(" PROCESS: forked pid ");
print_hex(current_process->pid);
offset_print(" PROCESS: -> child pid ");
print_hex(child->pid);
/* Parent's return value is child's PID */
return (int32_t)child->pid;
}

View File

@@ -56,6 +56,7 @@ typedef struct process {
uint32_t entry_point; /**< User-mode entry point. */
int32_t exit_code; /**< Exit code (if ZOMBIE). */
uint32_t parent_pid; /**< Parent process ID. */
uint32_t waiting_for_pid; /**< PID we are blocked waiting for (if BLOCKED). */
char name[32]; /**< Process name (for debugging). */
} process_t;
@@ -113,10 +114,12 @@ process_t *process_get(uint32_t pid);
/**
* Fork the current process.
* Clones the current process's address space and register state.
*
* @param regs Pointer to the current interrupt frame (syscall registers).
* @return PID of the child in the parent, 0 in the child, -1 on error.
*/
int32_t process_fork(void);
int32_t process_fork(registers_t *regs);
/**
* Start the first user-mode process. Does not return if a process is ready.

View File

@@ -13,6 +13,10 @@
#include "vga.h"
#include <stddef.h>
/** Magic return value indicating the syscall blocked and switched processes.
* syscall_handler must NOT overwrite regs->eax in this case. */
#define SYSCALL_SWITCHED 0x7FFFFFFF
/* Debug print helpers defined in kernel.c */
extern void offset_print(const char *str);
extern void print_hex(uint32_t val);
@@ -66,8 +70,7 @@ static int32_t sys_read(registers_t *regs) {
* Handle SYS_FORK: fork the current process.
*/
static int32_t sys_fork(registers_t *regs) {
(void)regs;
return process_fork();
return process_fork(regs);
}
/**
@@ -90,6 +93,11 @@ static int32_t sys_yield(registers_t *regs) {
/**
* Handle SYS_WAITPID: wait for a child to exit.
*
* If the child is already a zombie, reaps immediately and returns the code.
* Otherwise, blocks the current process and switches to the next one.
* When the child exits, process_exit() will unblock the waiting parent
* and set its saved_regs.eax to the exit code.
*/
static int32_t sys_waitpid(registers_t *regs) {
uint32_t pid = regs->ebx;
@@ -98,14 +106,29 @@ static int32_t sys_waitpid(registers_t *regs) {
return -1;
}
/* Busy-wait until child is zombie */
while (child->state != PROCESS_ZOMBIE) {
schedule();
/* If child already exited, reap immediately */
if (child->state == PROCESS_ZOMBIE) {
int32_t code = child->exit_code;
child->state = PROCESS_UNUSED;
return code;
}
int32_t code = child->exit_code;
child->state = PROCESS_UNUSED;
return code;
/* Block the current process until the child exits */
process_t *cur = process_current();
cur->state = PROCESS_BLOCKED;
cur->waiting_for_pid = pid;
/* Save the current syscall registers so we resume here when unblocked.
* The return value (eax) will be set by process_exit when the child dies. */
cur->saved_regs = *regs;
/* Schedule the next process. This modifies *regs to the next process's
* saved state, so when the ISR stub does iret, it enters the next process. */
schedule_tick(regs);
/* Tell syscall_handler not to overwrite regs->eax, since regs now
* points to the next process's registers on the kernel stack. */
return SYSCALL_SWITCHED;
}
/**
@@ -140,7 +163,9 @@ void syscall_handler(registers_t *regs) {
}
int32_t ret = syscall_table[num](regs);
regs->eax = (uint32_t)ret;
if (ret != SYSCALL_SWITCHED) {
regs->eax = (uint32_t)ret;
}
}
void init_syscalls(void) {