Files
claude-os/src/paging.c
AI 42328ead0b feat: implement fork system call with deep address space cloning (AI)
- Added paging_clone_directory_from(): deep-copies user-space pages so
  parent and child have independent memory. Kernel pages are shared.
- Fixed process_fork() to accept registers_t* for accurate child state,
  and to clone from the parent's page directory (not the kernel's).
- Refactored process_exit() to properly context-switch to next process
  using new process_switch_to_user assembly stub (loads full registers_t
  and performs iret), instead of halting unconditionally.
- Fixed sys_waitpid() to use proper blocking: marks process BLOCKED,
  invokes scheduler, and resumes with exit code when child dies.
- Added SYSCALL_SWITCHED mechanism to prevent syscall_handler from
  clobbering the next process's EAX after a context switch.
- Created fork-test user app that validates fork + waitpid.
- Added docs/fork.md with architecture documentation.

Tested: fork-test creates child, both print messages, parent waits for
child exit (code 7), parent reaps and exits (code 0). hello-world also
verified to still work correctly after the process_exit refactor.
2026-02-23 12:42:02 +00:00

385 lines
12 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* @file paging.c
* @brief Virtual memory paging subsystem implementation.
*
* Implements two-level x86 paging (page directory + page tables) with 4 KiB
* pages. At initialization, all detected physical memory is identity-mapped
* so that physical addresses equal virtual addresses. Drivers and the kernel
* can then allocate additional virtual pages as needed.
*
* The kernel heap region starts at KERNEL_HEAP_START (0xD0000000) and grows
* upward as pages are requested through paging_alloc_page().
*/
#include "paging.h"
#include "pmm.h"
#include "port_io.h"
#include <stddef.h>
#include <string.h>
/* Debug print helpers defined in kernel.c */
extern void offset_print(const char *str);
extern void print_hex(uint32_t val);
/** Kernel heap starts at 0xD0000000 (above the 0xC0000000 higher-half region). */
#define KERNEL_HEAP_START 0xD0000000
/** Kernel heap ends at 0xF0000000 (768 MiB of virtual space for kernel heap). */
#define KERNEL_HEAP_END 0xF0000000
/**
* The page directory. Must be page-aligned (4 KiB).
* Each entry either points to a page table or is zero (not present).
*/
static uint32_t page_directory[PAGE_ENTRIES] __attribute__((aligned(4096)));
/**
* Storage for page tables. We pre-allocate enough for identity mapping.
* For a system with up to 4 GiB, we'd need 1024 page tables, but we
* only use these for the first 16 MiB during early boot. Additional page
* tables are allocated from the PMM as needed.
*
* The first 16 MiB must be statically allocated because the PMM bitmap
* itself lives in BSS within this region.
*/
#define STATIC_PT_COUNT 4
static uint32_t static_page_tables[STATIC_PT_COUNT][PAGE_ENTRIES] __attribute__((aligned(4096)));
/**
* Dynamically allocated page tables for memory above 16 MiB.
* Before paging is enabled, we allocate these from the PMM and store
* their physical addresses here so we can access them after paging.
*/
#define MAX_DYNAMIC_PT 256
static uint32_t *dynamic_page_tables[MAX_DYNAMIC_PT];
static uint32_t dynamic_pt_count = 0;
/** Next virtual address to hand out from the kernel heap. */
static uint32_t heap_next = KERNEL_HEAP_START;
/**
* Flush a single TLB entry for the given virtual address.
*
* @param vaddr The virtual address whose TLB entry to invalidate.
*/
static inline void tlb_flush_single(uint32_t vaddr) {
__asm__ volatile("invlpg (%0)" : : "r"(vaddr) : "memory");
}
/**
* Reload CR3 to flush the entire TLB.
*/
static inline void tlb_flush_all(void) {
uint32_t cr3;
__asm__ volatile("mov %%cr3, %0" : "=r"(cr3));
__asm__ volatile("mov %0, %%cr3" : : "r"(cr3) : "memory");
}
/**
* Get a page table for a given page directory index.
*
* If the page directory entry is not present, allocate a new page table
* from the PMM and install it.
*
* @param pd_idx Page directory index (01023).
* @param create If non-zero, create the page table if it doesn't exist.
* @return Pointer to the page table, or NULL if not present and !create.
*/
static uint32_t *get_page_table(uint32_t pd_idx, int create) {
if (page_directory[pd_idx] & PAGE_PRESENT) {
return (uint32_t *)(page_directory[pd_idx] & 0xFFFFF000);
}
if (!create) {
return NULL;
}
/* Allocate a new page table from the PMM */
phys_addr_t pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (pt_phys == 0) {
offset_print(" PAGING: FATAL - could not allocate page table\n");
return NULL;
}
/* Zero the new page table */
memset((void *)pt_phys, 0, 4096);
/* Install it in the page directory */
page_directory[pd_idx] = pt_phys | PAGE_PRESENT | PAGE_WRITE;
return (uint32_t *)pt_phys;
}
void paging_map_page(uint32_t vaddr, uint32_t paddr, uint32_t flags) {
uint32_t pd_idx = PD_INDEX(vaddr);
uint32_t pt_idx = PT_INDEX(vaddr);
uint32_t *pt = get_page_table(pd_idx, 1);
if (!pt) {
return;
}
pt[pt_idx] = (paddr & 0xFFFFF000) | (flags & 0xFFF);
tlb_flush_single(vaddr);
}
void paging_unmap_page(uint32_t vaddr) {
uint32_t pd_idx = PD_INDEX(vaddr);
uint32_t pt_idx = PT_INDEX(vaddr);
uint32_t *pt = get_page_table(pd_idx, 0);
if (!pt) {
return;
}
pt[pt_idx] = 0;
tlb_flush_single(vaddr);
}
uint32_t paging_get_physical(uint32_t vaddr) {
uint32_t pd_idx = PD_INDEX(vaddr);
uint32_t pt_idx = PT_INDEX(vaddr);
uint32_t *pt = get_page_table(pd_idx, 0);
if (!pt) {
return 0;
}
if (!(pt[pt_idx] & PAGE_PRESENT)) {
return 0;
}
return (pt[pt_idx] & 0xFFFFF000) | (vaddr & 0xFFF);
}
void *paging_alloc_page(void) {
if (heap_next >= KERNEL_HEAP_END) {
offset_print(" PAGING: kernel heap exhausted\n");
return NULL;
}
/* Allocate a physical page */
phys_addr_t paddr = pmm_alloc_page(PMM_ZONE_NORMAL);
if (paddr == 0) {
offset_print(" PAGING: out of physical memory\n");
return NULL;
}
/* Map it into the kernel heap */
uint32_t vaddr = heap_next;
paging_map_page(vaddr, paddr, PAGE_PRESENT | PAGE_WRITE);
heap_next += 4096;
return (void *)vaddr;
}
void paging_free_page(void *vaddr) {
uint32_t va = (uint32_t)vaddr;
/* Look up the physical address before unmapping */
uint32_t paddr = paging_get_physical(va);
if (paddr == 0) {
return;
}
/* Unmap the virtual page */
paging_unmap_page(va);
/* Return the physical page to the PMM */
pmm_free_page(paddr & 0xFFFFF000);
}
void init_paging(void) {
/* 1. Zero the page directory */
memset(page_directory, 0, sizeof(page_directory));
/* 2. Identity map the first 16 MiB using static page tables.
* This covers the kernel (loaded at 1 MiB), the PMM bitmap (in BSS),
* the stack, and typical BIOS/device regions.
* Each page table maps 4 MiB (1024 entries × 4 KiB).
*/
for (uint32_t i = 0; i < STATIC_PT_COUNT; i++) {
memset(static_page_tables[i], 0, sizeof(static_page_tables[i]));
for (uint32_t j = 0; j < PAGE_ENTRIES; j++) {
uint32_t paddr = (i * PAGE_ENTRIES + j) * 4096;
static_page_tables[i][j] = paddr | PAGE_PRESENT | PAGE_WRITE;
}
page_directory[i] = (uint32_t)static_page_tables[i] | PAGE_PRESENT | PAGE_WRITE;
}
offset_print(" PAGING: identity mapped first 16 MiB\n");
/* 3. Identity map memory above 16 MiB using dynamically allocated page
* tables. We do this BEFORE enabling paging, so physical addresses
* are still directly accessible.
*
* mem_upper is in KiB and starts at 1 MiB, so total memory is
* approximately (mem_upper + 1024) KiB.
*/
uint32_t mem_kb = pmm_get_memory_size() + 1024; /* total memory in KiB */
uint32_t total_bytes = mem_kb * 1024;
uint32_t pd_entries_needed = (total_bytes + (4 * 1024 * 1024 - 1)) / (4 * 1024 * 1024);
if (pd_entries_needed > PAGE_ENTRIES) {
pd_entries_needed = PAGE_ENTRIES;
}
dynamic_pt_count = 0;
for (uint32_t i = STATIC_PT_COUNT; i < pd_entries_needed; i++) {
if (dynamic_pt_count >= MAX_DYNAMIC_PT) {
break;
}
/* Allocate a page for this page table from the DMA zone,
* since we need it to be accessible before paging is enabled
* (i.e., within the first 16 MiB identity map won't help for
* the page table itself, but we haven't enabled paging yet so
* ALL physical memory is accessible). */
phys_addr_t pt_phys = pmm_alloc_page(PMM_ZONE_DMA);
if (pt_phys == 0) {
pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
}
if (pt_phys == 0) {
offset_print(" PAGING: WARNING - could not alloc page table\n");
break;
}
uint32_t *pt = (uint32_t *)pt_phys;
dynamic_page_tables[dynamic_pt_count++] = pt;
/* Fill the page table with identity mappings */
for (uint32_t j = 0; j < PAGE_ENTRIES; j++) {
uint32_t paddr = (i * PAGE_ENTRIES + j) * 4096;
pt[j] = paddr | PAGE_PRESENT | PAGE_WRITE;
}
page_directory[i] = pt_phys | PAGE_PRESENT | PAGE_WRITE;
}
if (dynamic_pt_count > 0) {
offset_print(" PAGING: identity mapped ");
print_hex(pd_entries_needed * 4);
offset_print(" PAGING: MiB total using ");
print_hex(dynamic_pt_count);
offset_print(" PAGING: additional page tables\n");
}
/* 4. Load the page directory into CR3 */
__asm__ volatile("mov %0, %%cr3" : : "r"(page_directory) : "memory");
/* 5. Enable paging by setting bit 31 (PG) of CR0 */
uint32_t cr0;
__asm__ volatile("mov %%cr0, %0" : "=r"(cr0));
cr0 |= 0x80000000;
__asm__ volatile("mov %0, %%cr0" : : "r"(cr0) : "memory");
offset_print(" PAGING: enabled\n");
}
uint32_t paging_get_directory_phys(void) {
return (uint32_t)page_directory;
}
uint32_t paging_clone_directory(void) {
/* Allocate a new page for the directory */
phys_addr_t new_dir_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_dir_phys == 0) {
offset_print(" PAGING: cannot allocate page directory\n");
return 0;
}
uint32_t *new_dir = (uint32_t *)new_dir_phys;
/* Copy all entries from the kernel page directory.
* This shares the kernel-space mappings (identity map, kernel heap)
* with the new process. User-space mappings will be added separately. */
memcpy(new_dir, page_directory, 4096);
return new_dir_phys;
}
uint32_t paging_clone_directory_from(uint32_t src_pd_phys) {
uint32_t *src_pd = (uint32_t *)src_pd_phys;
/* Allocate a new page directory */
phys_addr_t new_pd_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_pd_phys == 0) {
offset_print(" PAGING: cannot allocate page directory for fork\n");
return 0;
}
uint32_t *new_pd = (uint32_t *)new_pd_phys;
/* Copy all page directory entries (shares kernel mappings) */
memcpy(new_pd, src_pd, 4096);
/* Deep-copy user-space page tables (those with PAGE_USER set) */
for (uint32_t i = 0; i < PAGE_ENTRIES; i++) {
if (!(src_pd[i] & PAGE_PRESENT)) continue;
if (!(src_pd[i] & PAGE_USER)) continue; /* kernel entry, shared */
uint32_t *src_pt = (uint32_t *)(src_pd[i] & 0xFFFFF000);
/* Allocate a new page table */
phys_addr_t new_pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_pt_phys == 0) {
offset_print(" PAGING: fork: cannot allocate page table\n");
return 0; /* TODO: free partially allocated pages */
}
uint32_t *new_pt = (uint32_t *)new_pt_phys;
/* Deep-copy each page in the page table */
for (uint32_t j = 0; j < PAGE_ENTRIES; j++) {
if (!(src_pt[j] & PAGE_PRESENT)) {
new_pt[j] = 0;
continue;
}
if (src_pt[j] & PAGE_USER) {
/* User page: allocate new physical page and copy content */
phys_addr_t old_phys = src_pt[j] & 0xFFFFF000;
phys_addr_t new_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (new_phys == 0) {
offset_print(" PAGING: fork: cannot allocate page\n");
return 0;
}
memcpy((void *)new_phys, (void *)old_phys, 4096);
new_pt[j] = new_phys | (src_pt[j] & 0xFFF);
} else {
/* Kernel page within a user page table: share directly */
new_pt[j] = src_pt[j];
}
}
new_pd[i] = new_pt_phys | (src_pd[i] & 0xFFF);
}
return new_pd_phys;
}
void paging_map_page_in(uint32_t *pd, uint32_t vaddr, uint32_t paddr, uint32_t flags) {
uint32_t pd_idx = PD_INDEX(vaddr);
uint32_t pt_idx = PT_INDEX(vaddr);
uint32_t *pt;
if (pd[pd_idx] & PAGE_PRESENT) {
pt = (uint32_t *)(pd[pd_idx] & 0xFFFFF000);
} else {
/* Allocate a new page table */
phys_addr_t pt_phys = pmm_alloc_page(PMM_ZONE_NORMAL);
if (pt_phys == 0) {
offset_print(" PAGING: cannot allocate page table for process\n");
return;
}
memset((void *)pt_phys, 0, 4096);
pd[pd_idx] = pt_phys | PAGE_PRESENT | PAGE_WRITE | PAGE_USER;
pt = (uint32_t *)pt_phys;
}
pt[pt_idx] = (paddr & 0xFFFFF000) | (flags & 0xFFF);
}
void paging_switch_directory(uint32_t phys_addr) {
__asm__ volatile("mov %0, %%cr3" : : "r"(phys_addr) : "memory");
}