Loading learning content...
Real-world applications rarely spawn just one child process. Build systems compile thousands of source files in parallel. Web servers fork handlers for concurrent requests. Batch processing systems manage pools of workers executing queued tasks.
When managing multiple children, simple wait() calls no longer suffice. Questions arise:
This page provides the patterns and techniques needed to manage multiple child processes reliably, from basic tracking to sophisticated worker pools.
By the end of this page, you will understand PID tracking strategies, know how to wait for specific children or any child, master worker pool patterns, understand race conditions and how to prevent them, and be able to build robust multi-child process managers.
When a parent spawns multiple children, it must track them to:
Simple Approach: Array of PIDs
The most basic approach stores PIDs in an array:
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <sys/wait.h> #define MAX_CHILDREN 100 /** * Simple PID array tracking * * Limitations: * - Fixed size * - No task association * - O(n) lookup by PID */int main() { pid_t children[MAX_CHILDREN]; int num_children = 0; // Spawn multiple children for (int i = 0; i < 5; i++) { pid_t pid = fork(); if (pid < 0) { perror("fork"); exit(1); } if (pid == 0) { // Child process sleep(i + 1); // Different durations printf("Child %d (task %d) exiting", getpid(), i); exit(i); } // Parent: track the PID children[num_children++] = pid; printf("Spawned child %d for task %d", pid, i); } // Wait for all children printf("Waiting for %d children...", num_children); for (int i = 0; i < num_children; i++) { int status; pid_t terminated = wait(&status); // Find which task this PID corresponded to int task_id = -1; for (int j = 0; j < num_children; j++) { if (children[j] == terminated) { task_id = j; break; } } printf("Child %d (task %d) finished with status %d", terminated, task_id, WEXITSTATUS(status)); } printf("All children complete"); return 0;}Limitations of Simple Arrays:
Better Approach: Structured Tracking
For serious applications, use a structured approach:
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <sys/wait.h>#include <time.h> /** * Structured child process tracking */typedef enum { CHILD_PENDING, // Not yet started CHILD_RUNNING, // Fork succeeded, child executing CHILD_COMPLETE, // Child reaped, status available CHILD_FAILED // Fork failed or child crashed} ChildState; typedef struct { pid_t pid; // Process ID (0 if not running) ChildState state; // Current state int exit_status; // Exit status (when complete) int signal_num; // Signal if killed time_t start_time; // When child started time_t end_time; // When child finished char task_name[64]; // Description of task int task_id; // Numeric task identifier void *task_data; // Arbitrary task data} ChildInfo; typedef struct { ChildInfo *children; int count; int capacity; int running; // Count of currently running int completed; // Count of completed/failed} ChildTracker; /** * Initialize the tracker */void tracker_init(ChildTracker *t, int capacity) { t->children = calloc(capacity, sizeof(ChildInfo)); t->count = 0; t->capacity = capacity; t->running = 0; t->completed = 0;} /** * Start a child for a task */int tracker_start(ChildTracker *t, const char *task_name, int task_id, void (*child_func)(int, void*), void *data) { if (t->count >= t->capacity) { fprintf(stderr, "Tracker full"); return -1; } pid_t pid = fork(); if (pid < 0) { perror("fork"); return -1; } if (pid == 0) { // Child child_func(task_id, data); _exit(0); } // Parent: record child info ChildInfo *info = &t->children[t->count++]; info->pid = pid; info->state = CHILD_RUNNING; info->start_time = time(NULL); info->task_id = task_id; info->task_data = data; strncpy(info->task_name, task_name, sizeof(info->task_name) - 1); t->running++; printf("[Tracker] Started '%s' (task %d) as PID %d", task_name, task_id, pid); return t->count - 1; // Return index} /** * Find child info by PID */ChildInfo* tracker_find_by_pid(ChildTracker *t, pid_t pid) { for (int i = 0; i < t->count; i++) { if (t->children[i].pid == pid) { return &t->children[i]; } } return NULL;} /** * Reap one child (blocking) */ChildInfo* tracker_wait_one(ChildTracker *t) { if (t->running == 0) return NULL; int status; pid_t pid = wait(&status); if (pid < 0) return NULL; ChildInfo *info = tracker_find_by_pid(t, pid); if (info == NULL) { fprintf(stderr, "[Tracker] Unknown PID %d", pid); return NULL; } info->end_time = time(NULL); if (WIFEXITED(status)) { info->exit_status = WEXITSTATUS(status); info->state = (info->exit_status == 0) ? CHILD_COMPLETE : CHILD_FAILED; } else if (WIFSIGNALED(status)) { info->signal_num = WTERMSIG(status); info->state = CHILD_FAILED; } t->running--; t->completed++; printf("[Tracker] Reaped '%s' (PID %d): %s after %lds", info->task_name, pid, info->state == CHILD_COMPLETE ? "success" : "failed", info->end_time - info->start_time); return info;} /** * Wait for all children */void tracker_wait_all(ChildTracker *t) { while (t->running > 0) { tracker_wait_one(t); }} /** * Example child work function */void example_task(int task_id, void *data) { int duration = (int)(long)data; printf("Task %d: Working for %d seconds...", task_id, duration); sleep(duration); printf("Task %d: Complete", task_id);} int main() { ChildTracker tracker; tracker_init(&tracker, 10); tracker_start(&tracker, "quick-task", 0, example_task, (void*)1); tracker_start(&tracker, "medium-task", 1, example_task, (void*)3); tracker_start(&tracker, "long-task", 2, example_task, (void*)5); printf("Started %d tasks, waiting... ", tracker.count); tracker_wait_all(&tracker); printf("All tasks complete!"); printf("Completed: %d, Running: %d", tracker.completed, tracker.running); free(tracker.children); return 0;}When you need to wait for a particular child—not just any—use waitpid() with a specific PID:
pid_t waitpid(pid_t pid, int *status, int options);
With pid > 0, waitpid() waits only for that specific child. Other children that terminate are left as zombies until you wait for them.
Use Case: Sequential Dependencies
Some tasks must complete before others start:
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <sys/wait.h> /** * Waiting for specific children in dependency order * * Scenario: * - Task A produces data * - Task B processes data (depends on A) * - Task C summarizes results (depends on B) * - Task D runs independently */int main() { pid_t task_a, task_b, task_c, task_d; int status; // Start Task A (data production) task_a = fork(); if (task_a == 0) { printf("Task A: Producing data..."); sleep(2); printf("Task A: Data ready"); exit(0); } printf("Started Task A: PID %d", task_a); // Start Task D (independent, can run in parallel) task_d = fork(); if (task_d == 0) { printf("Task D: Running independently..."); sleep(4); printf("Task D: Complete"); exit(0); } printf("Started Task D: PID %d", task_d); // Wait specifically for Task A before starting Task B printf("Waiting for Task A to complete..."); waitpid(task_a, &status, 0); if (WEXITSTATUS(status) != 0) { fprintf(stderr, "Task A failed, aborting pipeline"); kill(task_d, SIGTERM); wait(NULL); // Reap D exit(1); } printf("Task A complete, starting Task B"); // Now start Task B task_b = fork(); if (task_b == 0) { printf("Task B: Processing data from A..."); sleep(2); printf("Task B: Processing complete"); exit(0); } printf("Started Task B: PID %d", task_b); // Wait for Task B before Task C waitpid(task_b, &status, 0); printf("Task B complete, starting Task C"); task_c = fork(); if (task_c == 0) { printf("Task C: Summarizing..."); sleep(1); printf("Task C: Summary complete"); exit(0); } printf("Started Task C: PID %d", task_c); // Wait for remaining children waitpid(task_c, &status, 0); waitpid(task_d, &status, 0); // May already be done printf("All tasks complete!"); return 0;}When waiting for a specific PID, other children that terminate become zombies until you wait for them too. If Task D finishes while you're waiting for Task A, it becomes a zombie. Always ensure you eventually wait for ALL children.
Use Case: Ordered Completion
Sometimes results must be processed in a specific order, even if children complete out of order:
1234567891011121314151617181920212223242526272829303132333435
/** * Process children in order, regardless of completion order * * Useful when output must be assembled in sequence * (like parallel page rendering for a document) */#define NUM_PAGES 5 typedef struct { pid_t pid; int complete; int status;} PageWorker; void wait_in_order(PageWorker *workers, int count) { // For each slot in order... for (int i = 0; i < count; i++) { if (!workers[i].complete) { // Wait specifically for this child int status; waitpid(workers[i].pid, &status, 0); workers[i].status = status; workers[i].complete = 1; } // Now process result for page i // Even if page 5 finished first, we process page 1 first printf("Processing page %d result (status %d)", i, WEXITSTATUS(workers[i].status)); }} // Meanwhile, a SIGCHLD handler could mark workers as complete// so the ordered processing encounters no blocking for already-done pagesWhen children are independent workers and order doesn't matter, wait for whichever finishes first:
// Wait for any child
waitpid(-1, &status, 0);
// or equivalently:
wait(&status);
Use Case: Work Queue
Process tasks as workers become available:
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <sys/wait.h> #define MAX_WORKERS 4#define TOTAL_TASKS 10 /** * Work queue with worker pool * * Maintains MAX_WORKERS running at all times, * assigning new tasks as workers complete. */ typedef struct { pid_t pid; int task_id;} Worker; Worker workers[MAX_WORKERS] = {0};int active_workers = 0; int find_free_slot() { for (int i = 0; i < MAX_WORKERS; i++) { if (workers[i].pid == 0) return i; } return -1;} int find_worker_by_pid(pid_t pid) { for (int i = 0; i < MAX_WORKERS; i++) { if (workers[i].pid == pid) return i; } return -1;} void start_task(int task_id) { int slot = find_free_slot(); if (slot < 0) { fprintf(stderr, "No free slot!"); return; } pid_t pid = fork(); if (pid == 0) { // Child: simulate variable-length work int duration = 1 + (task_id % 3); // 1-3 seconds printf("Task %d: Starting (%d seconds)", task_id, duration); sleep(duration); printf("Task %d: Complete", task_id); exit(0); } workers[slot].pid = pid; workers[slot].task_id = task_id; active_workers++; printf("[Pool] Assigned task %d to worker slot %d (PID %d)", task_id, slot, pid);} void wait_for_one() { int status; pid_t pid = wait(&status); // Wait for ANY child int slot = find_worker_by_pid(pid); if (slot >= 0) { printf("[Pool] Task %d completed (slot %d freed)", workers[slot].task_id, slot); workers[slot].pid = 0; workers[slot].task_id = -1; active_workers--; }} int main() { int next_task = 0; printf("=== Worker Pool: %d workers, %d tasks === ", MAX_WORKERS, TOTAL_TASKS); // Initial fill while (active_workers < MAX_WORKERS && next_task < TOTAL_TASKS) { start_task(next_task++); } // Process until all tasks done while (next_task < TOTAL_TASKS || active_workers > 0) { // Wait for any worker to finish wait_for_one(); // Start new task if available if (next_task < TOTAL_TASKS) { start_task(next_task++); } } printf("=== All %d tasks complete ===", TOTAL_TASKS); return 0;}The Pattern:
This pattern maximizes parallelism: there are always MAX_WORKERS processes running (until tasks run out).
Hybrid Approach: Check Specific, Fall Back to Any
Sometimes you prefer a specific child but will take any:
123456789101112131415161718192021222324252627
/** * Hybrid wait: prefer high-priority child, but reap any */pid_t wait_prefer(pid_t preferred, int *status, int timeout_ms) { int elapsed = 0; while (elapsed < timeout_ms) { // Try preferred child first (non-blocking) pid_t result = waitpid(preferred, status, WNOHANG); if (result > 0) { return result; // Got preferred } // Try any child (non-blocking) result = waitpid(-1, status, WNOHANG); if (result > 0) { return result; // Got some child } // Nothing ready, wait a bit and try again usleep(10000); // 10ms elapsed += 10; } // Timeout: do a blocking wait return waitpid(-1, status, 0);}waitpid() supports waiting for process groups—sets of related processes. This is powerful for job control and managing pipelines.
Waiting by Process Group:
| pid value | Meaning |
|---|---|
0 | Wait for any child in the same process group as the caller |
< -1 | Wait for any child in the process group ` |
What Are Process Groups?
Every process belongs to exactly one process group. By default, children inherit the parent's group. Shells create new groups for pipeline jobs:
$ ls -la | grep foo | wc -l
This pipeline runs in its own process group, allowing the shell to:
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <sys/wait.h>#include <signal.h> /** * Demonstrates process group creation and waiting */int main() { // Create first child as new process group leader pid_t leader = fork(); if (leader == 0) { // Become own process group leader setpgid(0, 0); // Set PGID to own PID printf("Leader (PID %d, PGID %d): Starting...", getpid(), getpgrp()); // Spawn children in same group for (int i = 0; i < 3; i++) { pid_t worker = fork(); if (worker == 0) { printf(" Worker %d (PID %d, PGID %d)", i, getpid(), getpgrp()); sleep(i + 1); printf(" Worker %d: Done", i); _exit(i); } } // Leader waits for its children for (int i = 0; i < 3; i++) { wait(NULL); } printf("Leader: All workers done"); _exit(0); } // Parent needs to also put child in new group // (Race: either parent or child can set the PGID first) setpgid(leader, leader); printf("Parent (PID %d): Created job group (PGID %d)", getpid(), leader); // Wait for entire process group int status; pid_t pg = -leader; // Negative PGID means "any in this group" printf("Parent: Waiting for process group %d...", leader); // Could also wait for just the leader waitpid(leader, &status, 0); printf("Job group leader terminated"); return 0;}Shells create a new process group for each foreground command/pipeline. When you press Ctrl+C (SIGINT) or Ctrl+Z (SIGTSTP), the signal goes to the entire foreground process group. This is how the shell stops or interrupts all processes in a pipeline at once.
Common Process Group Operations:
// Get current process's group ID
pid_t pgid = getpgrp(); // POSIX
pid_t pgid = getpgid(0); // Equivalent, 0 = current process
// Set process group
setpgid(pid, pgid); // Put 'pid' in group 'pgid'
setpgid(0, 0); // Make current process a group leader
// Send signal to entire group
kill(-pgid, SIGTERM); // Negative = group, not process
// Wait for any in group
waitpid(-pgid, &status, 0); // Negative = group
waitpid(0, &status, 0); // 0 = same group as caller
Managing multiple children introduces subtle race conditions. Let's examine the most common and how to prevent them.
Race 1: Child Terminates Before waitpid() Called
This is NOT a race problem! The kernel handles it correctly:
pid_t pid = fork();
if (pid == 0) {
_exit(0); // Child exits immediately
}
// Even if child exits here, before next line...
waitpid(pid, &status, 0); // Still works! Child is a zombie.
The zombie mechanism exists precisely to handle this. The child's status is preserved until reaped.
Race 2: SIGCHLD Handler vs. Explicit wait()
This IS a problem:
12345678910111213141516171819202122232425262728293031323334
/** * RACE CONDITION: SIGCHLD handler conflicts with explicit wait * * BUG: Both handler and main code try to reap the same child */ void sigchld_handler(int sig) { int status; pid_t pid = wait(&status); // BUG: May steal child from main code printf("Handler reaped %d", pid);} void buggy_code() { signal(SIGCHLD, sigchld_handler); pid_t child = fork(); if (child == 0) { _exit(42); } // BUG: Signal handler might run here, reaping the child sleep(1); // BUG: wait() might block forever or get wrong child int status; pid_t reaped = waitpid(child, &status, 0); if (reaped < 0) { // ECHILD: handler already reaped it perror("waitpid failed"); }}Solution: Consistent Reaping Strategy
Pick ONE approach and stick to it:
Option A: Handler does all reaping
WNOHANG loop to reap allwait() directlyOption B: Main code does all reaping
SIGCHLD or ignore itwaitpid(-1, ..., WNOHANG)Option C: Handler just notifies
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
/** * CORRECT: Handler reaps and records, main code reads results */ #include <stdio.h>#include <stdlib.h>#include <signal.h>#include <sys/wait.h>#include <unistd.h>#include <string.h>#include <errno.h> #define MAX_CHILDREN 100 typedef struct { pid_t pid; int status; int completed;} CompletionRecord; volatile CompletionRecord completions[MAX_CHILDREN];volatile int completion_count = 0; void sigchld_handler(int sig) { int saved_errno = errno; int status; pid_t pid; while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { if (completion_count < MAX_CHILDREN) { completions[completion_count].pid = pid; completions[completion_count].status = status; completions[completion_count].completed = 1; completion_count++; // Not atomic, but good enough for demo } } errno = saved_errno;} int main() { struct sigaction sa = {0}; sa.sa_handler = sigchld_handler; sa.sa_flags = SA_RESTART | SA_NOCLDSTOP; sigaction(SIGCHLD, &sa, NULL); // Fork some children for (int i = 0; i < 5; i++) { pid_t pid = fork(); if (pid == 0) { sleep(i); _exit(i * 10); } printf("Started child %d", pid); } // Main loop processes completions int processed = 0; while (processed < 5) { // Check for new completions while (processed < completion_count) { printf("Main: Processing completion %d - PID %d, status %d", processed, completions[processed].pid, WEXITSTATUS(completions[processed].status)); processed++; } usleep(100000); // Brief sleep } printf("All children processed"); return 0;}When a signal handler modifies shared data, you must consider atomicity. The handler can interrupt the main code at any point. Use volatile for simple flags, sig_atomic_t for portable atomic operations, or block signals during critical sections of the main code.
Let's build a robust worker pool that combines the techniques we've learned: structured tracking, signal-driven completion, and proper race condition handling.
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <signal.h>#include <sys/wait.h>#include <errno.h> #define MAX_WORKERS 4#define MAX_TASKS 100 // Task definitiontypedef struct { int id; int duration; // Simulated work duration int priority;} Task; // Worker statetypedef enum { WORKER_IDLE, WORKER_BUSY, WORKER_DONE} WorkerState; typedef struct { pid_t pid; WorkerState state; Task *current_task; int exit_status;} Worker; // Pool statetypedef struct { Worker workers[MAX_WORKERS]; Task tasks[MAX_TASKS]; int task_count; int next_task; int completed_tasks; volatile sig_atomic_t child_exited; // Signal notification} WorkerPool; WorkerPool pool = {0}; // Signal handler - just sets flagvoid sigchld_handler(int sig) { pool.child_exited = 1;} // Find idle workerint find_idle_worker() { for (int i = 0; i < MAX_WORKERS; i++) { if (pool.workers[i].state == WORKER_IDLE) { return i; } } return -1;} // Reap all finished workersvoid reap_workers() { int status; pid_t pid; while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { for (int i = 0; i < MAX_WORKERS; i++) { if (pool.workers[i].pid == pid) { Worker *w = &pool.workers[i]; printf("[Pool] Worker %d finished task %d", i, w->current_task->id); if (WIFEXITED(status)) { w->exit_status = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { printf("[Pool] WARNING: Worker %d killed by signal %d", i, WTERMSIG(status)); w->exit_status = -1; } w->state = WORKER_IDLE; w->current_task = NULL; w->pid = 0; pool.completed_tasks++; break; } } } pool.child_exited = 0;} // Start a task on a workerint start_task(int worker_idx, Task *task) { Worker *w = &pool.workers[worker_idx]; pid_t pid = fork(); if (pid < 0) { perror("fork"); return -1; } if (pid == 0) { // Child: do the work printf("[Worker %d] Starting task %d (duration %d)", worker_idx, task->id, task->duration); sleep(task->duration); printf("[Worker %d] Completed task %d", worker_idx, task->id); _exit(0); } // Parent: record assignment w->pid = pid; w->state = WORKER_BUSY; w->current_task = task; return 0;} // Main pool loopvoid run_pool() { // Install signal handler struct sigaction sa = {0}; sa.sa_handler = sigchld_handler; sa.sa_flags = SA_RESTART | SA_NOCLDSTOP; sigaction(SIGCHLD, &sa, NULL); printf("[Pool] Starting with %d workers, %d tasks ", MAX_WORKERS, pool.task_count); while (pool.completed_tasks < pool.task_count) { // Check for finished workers if (pool.child_exited) { reap_workers(); } // Start new tasks on idle workers int worker; while ((worker = find_idle_worker()) >= 0 && pool.next_task < pool.task_count) { start_task(worker, &pool.tasks[pool.next_task++]); } // Wait a bit before checking again usleep(100000); } printf("[Pool] All %d tasks completed", pool.task_count);} int main() { // Initialize tasks pool.task_count = 10; for (int i = 0; i < pool.task_count; i++) { pool.tasks[i].id = i; pool.tasks[i].duration = 1 + (i % 3); // 1-3 seconds pool.tasks[i].priority = pool.task_count - i; } run_pool(); return 0;}This page has provided comprehensive coverage of managing multiple child processes—a common requirement in servers, build systems, and batch processors.
What's Next:
We've now covered all aspects of collecting and interpreting child termination status. The final page explores status macros in depth—the complete set of POSIX macros for decoding every possible termination scenario, including stopped and continued processes for job control.
You now have the tools to manage multiple child processes reliably. From simple PID tracking to sophisticated worker pools, you understand the patterns and pitfalls of multi-child process management. Next, we'll master the complete set of status macros.