From 3800d4934391b144fd261a7957aea72ced7d47ea Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 22 Feb 2009 18:10:01 +0800 Subject: [JOBS] Fix dowait signal race This test program by Alexey Gladkov can cause dash to enter an infinite loop in waitcmd. #!/bin/dash trap "echo TRAP" USR1 stub() { echo ">>> STUB $1" >&2 sleep $1 echo "<<< STUB $1" >&2 kill -USR1 $$ } stub 3 & stub 2 & until { echo "###"; wait; } do echo "*** $?" done The problem is that if we get a signal after the wait3 system call has returned but before we get to INTON in dowait, then we can jump back up to the top and lose the exit status. So if we then wait for the job that has just exited, then it'll stay there forever. I made the original change that caused this bug to fix pretty much the same bug but in the opposite direction. That is, if we get a signal after we enter wait3 but before we hit the kernel then it too can cause the wait to go on forever (assuming the child doesn't exit). In fact this is pretty much exactly the scenario that you'll find in glibc's documentation on pause(). The solution is given there too, in the form of sigsuspend, which is the only way to do the check and wait atomically. So this patch fixes Alexey's race without reintroducing the old bug by converting the blocking wait3 to a sigsuspend. In order to do this we need to set a signal handler for SIGCHLD, so the code has been modified to always do that. Signed-off-by: Herbert Xu --- src/jobs.c | 64 +++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 30 deletions(-) (limited to 'src/jobs.c') diff --git a/src/jobs.c b/src/jobs.c index 2b6a752..69a84f7 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -75,6 +75,7 @@ /* mode flags for dowait */ #define DOWAIT_NORMAL 0 #define DOWAIT_BLOCK 1 +#define DOWAIT_WAITCMD 2 /* array of jobs */ static struct job *jobtab; @@ -589,8 +590,6 @@ waitcmd(int argc, char **argv) int retval; struct job *jp; - EXSIGON(); - nextopt(nullstr); retval = 0; @@ -609,7 +608,8 @@ waitcmd(int argc, char **argv) jp->waited = 1; jp = jp->prev_job; } - dowait(DOWAIT_BLOCK, 0); + if (dowait(DOWAIT_WAITCMD, 0) <= 0) + goto sigout; } } @@ -631,7 +631,8 @@ start: job = getjob(*argv, 0); /* loop until process terminated or stopped */ while (job->state == JOBRUNNING) - dowait(DOWAIT_BLOCK, 0); + if (dowait(DOWAIT_WAITCMD, 0) <= 0) + goto sigout; job->waited = 1; retval = getstatus(job); repeat: @@ -640,6 +641,10 @@ repeat: out: return retval; + +sigout: + retval = 128 + pendingsigs; + goto out; } @@ -998,16 +1003,16 @@ dowait(int block, struct job *job) int pid; int status; struct job *jp; - struct job *thisjob; + struct job *thisjob = NULL; int state; + INTOFF; TRACE(("dowait(%d) called\n", block)); pid = waitproc(block, &status); TRACE(("wait returns pid %d, status=%d\n", pid, status)); if (pid <= 0) - return pid; - INTOFF; - thisjob = NULL; + goto out; + for (jp = curjob; jp; jp = jp->prev_job) { struct procstat *sp; struct procstat *spend; @@ -1115,34 +1120,33 @@ STATIC int onsigchild() { STATIC int waitproc(int block, int *status) { -#ifdef BSD - int flags = 0; + sigset_t mask, oldmask; + int flags = block == DOWAIT_BLOCK ? 0 : WNOHANG; + int err; + int sig; #if JOBS if (jobctl) flags |= WUNTRACED; #endif - if (block == 0) - flags |= WNOHANG; - return wait3(status, flags, (struct rusage *)NULL); -#else -#ifdef SYSV - int (*save)(); - if (block == 0) { - gotsigchild = 0; - save = signal(SIGCLD, onsigchild); - signal(SIGCLD, save); - if (gotsigchild == 0) - return 0; - } - return wait(status); -#else - if (block == 0) - return 0; - return wait(status); -#endif -#endif + do { + err = wait3(status, flags, NULL); + if (err || !block) + break; + + block = 0; + + sigfillset(&mask); + sigprocmask(SIG_SETMASK, &mask, &oldmask); + + while (!(sig = pendingsigs)) + sigsuspend(&oldmask); + + sigclearmask(); + } while (sig == SIGCHLD); + + return err; } /* -- cgit 1.4.1