feat: implement fork system call with deep address space cloning (AI)
- Added paging_clone_directory_from(): deep-copies user-space pages so parent and child have independent memory. Kernel pages are shared. - Fixed process_fork() to accept registers_t* for accurate child state, and to clone from the parent's page directory (not the kernel's). - Refactored process_exit() to properly context-switch to next process using new process_switch_to_user assembly stub (loads full registers_t and performs iret), instead of halting unconditionally. - Fixed sys_waitpid() to use proper blocking: marks process BLOCKED, invokes scheduler, and resumes with exit code when child dies. - Added SYSCALL_SWITCHED mechanism to prevent syscall_handler from clobbering the next process's EAX after a context switch. - Created fork-test user app that validates fork + waitpid. - Added docs/fork.md with architecture documentation. Tested: fork-test creates child, both print messages, parent waits for child exit (code 7), parent reaps and exits (code 0). hello-world also verified to still work correctly after the process_exit refactor.
This commit is contained in:
79
apps/fork-test/fork-test.S
Normal file
79
apps/fork-test/fork-test.S
Normal file
@@ -0,0 +1,79 @@
|
||||
#
|
||||
# fork-test: Tests the fork system call.
|
||||
#
|
||||
# 1. Calls SYS_FORK
|
||||
# 2. Parent prints "Parent: pid=<pid>\n" and waits for child
|
||||
# 3. Child prints "Child: pid=0\n" and exits with code 7
|
||||
# 4. Parent exits with code 0
|
||||
#
|
||||
|
||||
.section .text
|
||||
.global _start
|
||||
|
||||
# System call numbers
|
||||
.equ SYS_EXIT, 0
|
||||
.equ SYS_WRITE, 1
|
||||
.equ SYS_FORK, 3
|
||||
.equ SYS_GETPID, 4
|
||||
.equ SYS_WAITPID, 6
|
||||
|
||||
_start:
|
||||
# Fork
|
||||
movl $SYS_FORK, %eax
|
||||
int $0x80
|
||||
|
||||
# EAX = 0 in child, child PID in parent
|
||||
testl %eax, %eax
|
||||
jz .child
|
||||
|
||||
.parent:
|
||||
# Save child PID on the stack
|
||||
pushl %eax
|
||||
|
||||
# Print "Parent\n"
|
||||
movl $SYS_WRITE, %eax
|
||||
movl $1, %ebx # fd = stdout
|
||||
movl $parent_msg, %ecx
|
||||
movl $parent_msg_len, %edx
|
||||
int $0x80
|
||||
|
||||
# Waitpid for child
|
||||
popl %ebx # child PID
|
||||
movl $SYS_WAITPID, %eax
|
||||
int $0x80
|
||||
# EAX now has child's exit code (should be 7)
|
||||
|
||||
# Print "Reaped\n"
|
||||
pushl %eax # save exit code
|
||||
movl $SYS_WRITE, %eax
|
||||
movl $1, %ebx
|
||||
movl $reaped_msg, %ecx
|
||||
movl $reaped_msg_len, %edx
|
||||
int $0x80
|
||||
popl %ebx # exit code (unused, exit with 0)
|
||||
|
||||
# Exit with code 0
|
||||
movl $SYS_EXIT, %eax
|
||||
movl $0, %ebx
|
||||
int $0x80
|
||||
|
||||
.child:
|
||||
# Print "Child\n"
|
||||
movl $SYS_WRITE, %eax
|
||||
movl $1, %ebx # fd = stdout
|
||||
movl $child_msg, %ecx
|
||||
movl $child_msg_len, %edx
|
||||
int $0x80
|
||||
|
||||
# Exit with code 7
|
||||
movl $SYS_EXIT, %eax
|
||||
movl $7, %ebx
|
||||
int $0x80
|
||||
|
||||
.section .rodata
|
||||
parent_msg: .ascii "Parent\n"
|
||||
.equ parent_msg_len, . - parent_msg
|
||||
child_msg: .ascii "Child\n"
|
||||
.equ child_msg_len, . - child_msg
|
||||
reaped_msg: .ascii "Reaped\n"
|
||||
.equ reaped_msg_len, . - reaped_msg
|
||||
83
docs/fork.md
Normal file
83
docs/fork.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Fork System Call
|
||||
|
||||
## Overview
|
||||
|
||||
The `fork()` system call duplicates the calling process, creating a new child
|
||||
process with an independent copy of the parent's address space.
|
||||
|
||||
## System Call Interface
|
||||
|
||||
- **Number**: `SYS_FORK` (3)
|
||||
- **Arguments**: None
|
||||
- **Returns**: Child PID in the parent, 0 in the child, -1 on error
|
||||
|
||||
## Implementation
|
||||
|
||||
### Address Space Cloning
|
||||
|
||||
`paging_clone_directory_from(src_pd_phys)` performs a deep copy of a process's
|
||||
page directory:
|
||||
|
||||
1. **Kernel-space entries** (no `PAGE_USER` flag): shared directly between
|
||||
parent and child. Both processes see the same kernel mappings.
|
||||
|
||||
2. **User-space entries** (`PAGE_USER` flag set): fully deep-copied. For each
|
||||
user-space page directory entry:
|
||||
- A new page table is allocated
|
||||
- Each present user page has a new physical page allocated and the content
|
||||
copied byte-for-byte
|
||||
- This ensures parent and child have completely independent memory
|
||||
|
||||
### Register State
|
||||
|
||||
The child receives a copy of the parent's register state at the time of the
|
||||
`INT 0x80` syscall, with `EAX` set to 0. This means the child resumes execution
|
||||
at the instruction immediately following the `INT 0x80` that triggered fork.
|
||||
|
||||
### Process Exit and Waitpid
|
||||
|
||||
`process_exit()` was refactored to support multi-process scenarios:
|
||||
|
||||
- When a process exits, it scans for any process blocked on `waitpid()` for
|
||||
its PID and unblocks it, setting the waiter's saved `EAX` to the exit code.
|
||||
- If another process is ready, `process_switch_to_user()` is called to
|
||||
directly context-switch via an assembly stub that loads the full register
|
||||
set and performs `iret`.
|
||||
- If no processes remain, the system halts.
|
||||
|
||||
`sys_waitpid()` supports blocking:
|
||||
|
||||
- If the child is already a zombie, it reaps immediately
|
||||
- Otherwise, the caller is marked `PROCESS_BLOCKED` and the scheduler is
|
||||
invoked to switch to another process
|
||||
- When the child exits, the parent is unblocked with the exit code
|
||||
|
||||
### Assembly Support
|
||||
|
||||
`process_switch_to_user` in `interrupts.S` loads a full `registers_t` struct
|
||||
and performs `iret` to enter user mode. This is used when `process_exit()`
|
||||
needs to context-switch outside the normal ISR return path.
|
||||
|
||||
## Syscall Flow
|
||||
|
||||
```
|
||||
User: INT 0x80 (EAX=SYS_FORK)
|
||||
→ ISR stub pushes registers
|
||||
→ isr_handler → syscall_handler → sys_fork(regs)
|
||||
→ process_fork(regs)
|
||||
→ Clone page directory with deep user-page copy
|
||||
→ Copy current interrupt frame to child (EAX=0)
|
||||
→ Return child PID to parent (via EAX)
|
||||
→ ISR stub pops registers, iret
|
||||
→ Parent continues with EAX=child_pid
|
||||
→ [Timer interrupt] → scheduler picks child
|
||||
→ Child starts with EAX=0
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
The `fork-test` application validates fork by:
|
||||
1. Calling `SYS_FORK`
|
||||
2. Parent prints "Parent" and calls `SYS_WAITPID`
|
||||
3. Child prints "Child" and exits with code 7
|
||||
4. Parent reaps child, prints "Reaped", exits with code 0
|
||||
@@ -174,3 +174,28 @@ enter_usermode:
|
||||
push $0x1B /* CS (user code) */
|
||||
push %ecx /* EIP (entry point) */
|
||||
iret
|
||||
|
||||
/*
|
||||
* process_switch_to_user - Restore full register state and iret to user mode.
|
||||
* void process_switch_to_user(registers_t *regs);
|
||||
*
|
||||
* Used by process_exit to context-switch to the next process when the normal
|
||||
* interrupt-return path isn't available (because we're not returning through
|
||||
* an ISR stub). Loads all registers from the registers_t struct and performs
|
||||
* iret to enter user mode.
|
||||
*/
|
||||
.global process_switch_to_user
|
||||
.type process_switch_to_user, @function
|
||||
process_switch_to_user:
|
||||
movl 4(%esp), %esp /* Point ESP to the registers_t struct */
|
||||
|
||||
/* Restore segment register (ds → all data segments) */
|
||||
pop %eax
|
||||
mov %ax, %ds
|
||||
mov %ax, %es
|
||||
mov %ax, %fs
|
||||
mov %ax, %gs
|
||||
|
||||
popa /* Restore EAX-EDI */
|
||||
addl $8, %esp /* Skip int_no and err_code */
|
||||
iret /* Pops EIP, CS, EFLAGS, UserESP, SS */
|
||||
|
||||
10
src/kernel.c
10
src/kernel.c
@@ -153,15 +153,15 @@ void kernel_main(uint32_t magic, uint32_t addr) {
|
||||
}
|
||||
|
||||
/* Load hello-world from the initrd and run it as a user process */
|
||||
cpio_entry_t hello_entry;
|
||||
if (cpio_find("hello-world", &hello_entry) == 0) {
|
||||
cpio_entry_t app_entry;
|
||||
if (cpio_find("hello-world", &app_entry) == 0) {
|
||||
offset_print("Found hello-world in initrd (");
|
||||
print_hex(hello_entry.datasize);
|
||||
print_hex(app_entry.datasize);
|
||||
offset_print(" bytes)\n");
|
||||
|
||||
int32_t pid = process_create("hello-world",
|
||||
hello_entry.data,
|
||||
hello_entry.datasize);
|
||||
app_entry.data,
|
||||
app_entry.datasize);
|
||||
if (pid > 0) {
|
||||
offset_print("Created hello-world process, pid=");
|
||||
print_hex((uint32_t)pid);
|
||||
|
||||
58
src/paging.c
58
src/paging.c
@@ -299,6 +299,64 @@ uint32_t paging_clone_directory(void) {
|
||||
return new_dir_phys;
|
||||
}
|
||||
|
||||
uint32_t paging_clone_directory_from(uint32_t src_pd_phys) {
|
||||
uint32_t *src_pd = (uint32_t *)src_pd_phys;
|
||||
|
||||
/* Allocate a new page directory */
|
||||
phys_addr_t new_pd_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
|
||||
if (new_pd_phys == 0) {
|
||||
offset_print(" PAGING: cannot allocate page directory for fork\n");
|
||||
return 0;
|
||||
}
|
||||
uint32_t *new_pd = (uint32_t *)new_pd_phys;
|
||||
|
||||
/* Copy all page directory entries (shares kernel mappings) */
|
||||
memcpy(new_pd, src_pd, 4096);
|
||||
|
||||
/* Deep-copy user-space page tables (those with PAGE_USER set) */
|
||||
for (uint32_t i = 0; i < PAGE_ENTRIES; i++) {
|
||||
if (!(src_pd[i] & PAGE_PRESENT)) continue;
|
||||
if (!(src_pd[i] & PAGE_USER)) continue; /* kernel entry, shared */
|
||||
|
||||
uint32_t *src_pt = (uint32_t *)(src_pd[i] & 0xFFFFF000);
|
||||
|
||||
/* Allocate a new page table */
|
||||
phys_addr_t new_pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
|
||||
if (new_pt_phys == 0) {
|
||||
offset_print(" PAGING: fork: cannot allocate page table\n");
|
||||
return 0; /* TODO: free partially allocated pages */
|
||||
}
|
||||
uint32_t *new_pt = (uint32_t *)new_pt_phys;
|
||||
|
||||
/* Deep-copy each page in the page table */
|
||||
for (uint32_t j = 0; j < PAGE_ENTRIES; j++) {
|
||||
if (!(src_pt[j] & PAGE_PRESENT)) {
|
||||
new_pt[j] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (src_pt[j] & PAGE_USER) {
|
||||
/* User page: allocate new physical page and copy content */
|
||||
phys_addr_t old_phys = src_pt[j] & 0xFFFFF000;
|
||||
phys_addr_t new_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
|
||||
if (new_phys == 0) {
|
||||
offset_print(" PAGING: fork: cannot allocate page\n");
|
||||
return 0;
|
||||
}
|
||||
memcpy((void *)new_phys, (void *)old_phys, 4096);
|
||||
new_pt[j] = new_phys | (src_pt[j] & 0xFFF);
|
||||
} else {
|
||||
/* Kernel page within a user page table: share directly */
|
||||
new_pt[j] = src_pt[j];
|
||||
}
|
||||
}
|
||||
|
||||
new_pd[i] = new_pt_phys | (src_pd[i] & 0xFFF);
|
||||
}
|
||||
|
||||
return new_pd_phys;
|
||||
}
|
||||
|
||||
void paging_map_page_in(uint32_t *pd, uint32_t vaddr, uint32_t paddr, uint32_t flags) {
|
||||
uint32_t pd_idx = PD_INDEX(vaddr);
|
||||
uint32_t pt_idx = PT_INDEX(vaddr);
|
||||
|
||||
11
src/paging.h
11
src/paging.h
@@ -98,6 +98,17 @@ uint32_t paging_get_directory_phys(void);
|
||||
*/
|
||||
uint32_t paging_clone_directory(void);
|
||||
|
||||
/**
|
||||
* Clone a page directory, deep-copying all user-space pages.
|
||||
* Kernel-space entries are shared (same page tables). User-space page
|
||||
* tables and their physical pages are duplicated so the clone is fully
|
||||
* independent.
|
||||
*
|
||||
* @param src_pd_phys Physical address of the source page directory.
|
||||
* @return Physical address of the new page directory, or 0 on failure.
|
||||
*/
|
||||
uint32_t paging_clone_directory_from(uint32_t src_pd_phys);
|
||||
|
||||
/**
|
||||
* Map a page in a specific page directory (not necessarily the active one).
|
||||
*
|
||||
|
||||
@@ -234,18 +234,43 @@ void process_exit(int32_t code) {
|
||||
current_process->state = PROCESS_ZOMBIE;
|
||||
current_process->exit_code = code;
|
||||
|
||||
/* Find another process to run.
|
||||
* We construct a minimal register frame to pass to schedule_tick.
|
||||
* Since the process is zombie, schedule_tick won't save its state. */
|
||||
registers_t dummy;
|
||||
memset(&dummy, 0, sizeof(dummy));
|
||||
schedule_tick(&dummy);
|
||||
|
||||
/* If we get here, no other process was ready. Halt. */
|
||||
offset_print(" PROCESS: no processes remaining, halting\n");
|
||||
for (;;) {
|
||||
__asm__ volatile("hlt");
|
||||
/* Wake any process blocked on waitpid for this PID */
|
||||
for (int i = 0; i < MAX_PROCESSES; i++) {
|
||||
if (process_table[i].state == PROCESS_BLOCKED &&
|
||||
process_table[i].waiting_for_pid == current_process->pid) {
|
||||
process_table[i].state = PROCESS_READY;
|
||||
process_table[i].saved_regs.eax = (uint32_t)code;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find next ready process to switch to */
|
||||
process_t *next = NULL;
|
||||
for (int i = 0; i < MAX_PROCESSES; i++) {
|
||||
if (process_table[i].state == PROCESS_READY) {
|
||||
next = &process_table[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!next) {
|
||||
offset_print(" PROCESS: no processes remaining, halting\n");
|
||||
for (;;) {
|
||||
__asm__ volatile("cli; hlt");
|
||||
}
|
||||
}
|
||||
|
||||
/* Context switch to the next process via assembly stub */
|
||||
current_process = next;
|
||||
next->state = PROCESS_RUNNING;
|
||||
tss_set_kernel_stack(next->kernel_stack_top);
|
||||
paging_switch_directory(next->page_directory);
|
||||
|
||||
extern void process_switch_to_user(registers_t *regs);
|
||||
process_switch_to_user(&next->saved_regs);
|
||||
|
||||
/* Should never reach here */
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
process_t *process_current(void) {
|
||||
@@ -262,7 +287,7 @@ process_t *process_get(uint32_t pid) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int32_t process_fork(void) {
|
||||
int32_t process_fork(registers_t *regs) {
|
||||
if (!current_process) {
|
||||
return -1;
|
||||
}
|
||||
@@ -278,6 +303,7 @@ int32_t process_fork(void) {
|
||||
child->pid = next_pid++;
|
||||
child->state = PROCESS_READY;
|
||||
child->parent_pid = current_process->pid;
|
||||
child->waiting_for_pid = 0;
|
||||
|
||||
/* Allocate a separate kernel stack for the child */
|
||||
void *child_kstack = paging_alloc_page();
|
||||
@@ -288,17 +314,27 @@ int32_t process_fork(void) {
|
||||
child->kernel_stack = (uint32_t)child_kstack;
|
||||
child->kernel_stack_top = child->kernel_stack + 4096;
|
||||
|
||||
/* Clone the page directory */
|
||||
child->page_directory = paging_clone_directory();
|
||||
/* Deep-clone the parent's page directory (copies all user-space pages) */
|
||||
child->page_directory = paging_clone_directory_from(current_process->page_directory);
|
||||
if (!child->page_directory) {
|
||||
kfree((void *)child->kernel_stack);
|
||||
paging_free_page((void *)child->kernel_stack);
|
||||
child->state = PROCESS_UNUSED;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Copy the current syscall registers to the child.
|
||||
* This ensures the child resumes at the same point as the parent
|
||||
* (right after the INT 0x80 instruction). */
|
||||
child->saved_regs = *regs;
|
||||
|
||||
/* Child's return value is 0 (in EAX) */
|
||||
child->saved_regs.eax = 0;
|
||||
|
||||
offset_print(" PROCESS: forked pid ");
|
||||
print_hex(current_process->pid);
|
||||
offset_print(" PROCESS: -> child pid ");
|
||||
print_hex(child->pid);
|
||||
|
||||
/* Parent's return value is child's PID */
|
||||
return (int32_t)child->pid;
|
||||
}
|
||||
|
||||
@@ -56,6 +56,7 @@ typedef struct process {
|
||||
uint32_t entry_point; /**< User-mode entry point. */
|
||||
int32_t exit_code; /**< Exit code (if ZOMBIE). */
|
||||
uint32_t parent_pid; /**< Parent process ID. */
|
||||
uint32_t waiting_for_pid; /**< PID we are blocked waiting for (if BLOCKED). */
|
||||
char name[32]; /**< Process name (for debugging). */
|
||||
} process_t;
|
||||
|
||||
@@ -113,10 +114,12 @@ process_t *process_get(uint32_t pid);
|
||||
|
||||
/**
|
||||
* Fork the current process.
|
||||
* Clones the current process's address space and register state.
|
||||
*
|
||||
* @param regs Pointer to the current interrupt frame (syscall registers).
|
||||
* @return PID of the child in the parent, 0 in the child, -1 on error.
|
||||
*/
|
||||
int32_t process_fork(void);
|
||||
int32_t process_fork(registers_t *regs);
|
||||
|
||||
/**
|
||||
* Start the first user-mode process. Does not return if a process is ready.
|
||||
|
||||
@@ -13,6 +13,10 @@
|
||||
#include "vga.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/** Magic return value indicating the syscall blocked and switched processes.
|
||||
* syscall_handler must NOT overwrite regs->eax in this case. */
|
||||
#define SYSCALL_SWITCHED 0x7FFFFFFF
|
||||
|
||||
/* Debug print helpers defined in kernel.c */
|
||||
extern void offset_print(const char *str);
|
||||
extern void print_hex(uint32_t val);
|
||||
@@ -66,8 +70,7 @@ static int32_t sys_read(registers_t *regs) {
|
||||
* Handle SYS_FORK: fork the current process.
|
||||
*/
|
||||
static int32_t sys_fork(registers_t *regs) {
|
||||
(void)regs;
|
||||
return process_fork();
|
||||
return process_fork(regs);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -90,6 +93,11 @@ static int32_t sys_yield(registers_t *regs) {
|
||||
|
||||
/**
|
||||
* Handle SYS_WAITPID: wait for a child to exit.
|
||||
*
|
||||
* If the child is already a zombie, reaps immediately and returns the code.
|
||||
* Otherwise, blocks the current process and switches to the next one.
|
||||
* When the child exits, process_exit() will unblock the waiting parent
|
||||
* and set its saved_regs.eax to the exit code.
|
||||
*/
|
||||
static int32_t sys_waitpid(registers_t *regs) {
|
||||
uint32_t pid = regs->ebx;
|
||||
@@ -98,14 +106,29 @@ static int32_t sys_waitpid(registers_t *regs) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Busy-wait until child is zombie */
|
||||
while (child->state != PROCESS_ZOMBIE) {
|
||||
schedule();
|
||||
/* If child already exited, reap immediately */
|
||||
if (child->state == PROCESS_ZOMBIE) {
|
||||
int32_t code = child->exit_code;
|
||||
child->state = PROCESS_UNUSED;
|
||||
return code;
|
||||
}
|
||||
|
||||
int32_t code = child->exit_code;
|
||||
child->state = PROCESS_UNUSED;
|
||||
return code;
|
||||
/* Block the current process until the child exits */
|
||||
process_t *cur = process_current();
|
||||
cur->state = PROCESS_BLOCKED;
|
||||
cur->waiting_for_pid = pid;
|
||||
|
||||
/* Save the current syscall registers so we resume here when unblocked.
|
||||
* The return value (eax) will be set by process_exit when the child dies. */
|
||||
cur->saved_regs = *regs;
|
||||
|
||||
/* Schedule the next process. This modifies *regs to the next process's
|
||||
* saved state, so when the ISR stub does iret, it enters the next process. */
|
||||
schedule_tick(regs);
|
||||
|
||||
/* Tell syscall_handler not to overwrite regs->eax, since regs now
|
||||
* points to the next process's registers on the kernel stack. */
|
||||
return SYSCALL_SWITCHED;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -140,7 +163,9 @@ void syscall_handler(registers_t *regs) {
|
||||
}
|
||||
|
||||
int32_t ret = syscall_table[num](regs);
|
||||
regs->eax = (uint32_t)ret;
|
||||
if (ret != SYSCALL_SWITCHED) {
|
||||
regs->eax = (uint32_t)ret;
|
||||
}
|
||||
}
|
||||
|
||||
void init_syscalls(void) {
|
||||
|
||||
Reference in New Issue
Block a user