libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.38 2003/06/22 00:16:43 robs Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <sys/types.h>
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30
  31 #include <arpa/inet.h>
  32 #include <assert.h>
  33 #include <errno.h>
  34 #include <fcntl.h>      /* for fcntl */
  35 #include <math.h>
  36 #include <memory.h>     /* for memchr() */
  37 #include <netinet/tcp.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/time.h>
  43 #include <sys/un.h>
  44 #include <signal.h>
  45
  46 #ifdef HAVE_NETDB_H
  47 #include <netdb.h>
  48 #endif
  49
  50 #ifdef HAVE_SYS_SOCKET_H
  51 #include <sys/socket.h> /* for getpeername */
  52 #endif
  53
  54 #ifdef HAVE_UNISTD_H
  55 #include <unistd.h>
  56 #endif
  57
  58 #include "fastcgi.h"
  59 #include "fcgimisc.h"
  60 #include "fcgios.h"
  61
  62 #ifndef INADDR_NONE
  63 #define INADDR_NONE ((unsigned long) -1)
  64 #endif
  65
  66 /*
  67  * This structure holds an entry for each oustanding async I/O operation.
  68  */
  69 typedef struct {
  70     OS_AsyncProc procPtr;           /* callout completion procedure */
  71     ClientData clientData;          /* caller private data */
  72     int fd;
  73     int len;
  74     int offset;
  75     void *buf;
  76     int inUse;
  77 } AioInfo;
  78
  79 /*
  80  * Entries in the async I/O table are allocated 2 per file descriptor.
  81  *
  82  * Read Entry Index  = fd * 2
  83  * Write Entry Index = (fd * 2) + 1
  84  */
  85 #define AIO_RD_IX(fd) (fd * 2)
  86 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  87
  88 static int asyncIoInUse = FALSE;
  89 static int asyncIoTableSize = 16;
  90 static AioInfo *asyncIoTable = NULL;
  91
  92 static int libInitialized = FALSE;
  93
  94 static fd_set readFdSet;
  95 static fd_set writeFdSet;
  96
  97 static fd_set readFdSetPost;
  98 static int numRdPosted = 0;
  99 static fd_set writeFdSetPost;
 100 static int numWrPosted = 0;
 101 static int volatile maxFd = -1;
 102
 103 static int shutdownPending = FALSE;
 104 static int shutdownNow = FALSE;
 105
 106 void OS_ShutdownPending()
 107 {
 108     shutdownPending = TRUE;
 109 }
 110
 111 static void OS_Sigusr1Handler(int signo)
 112 {
 113     OS_ShutdownPending();
 114 }
 115
 116 static void OS_SigpipeHandler(int signo)
 117 {
 118     ;
 119 }
 120
 121 static void installSignalHandler(int signo, const struct sigaction * act, int force)
 122 {
 123     struct sigaction sa;
 124
 125     sigaction(signo, NULL, &sa);
 126
 127     if (force || sa.sa_handler == SIG_DFL)
 128     {
 129         sigaction(signo, act, NULL);
 130     }
 131 }
 132
 133 static void OS_InstallSignalHandlers(int force)
 134 {
 135     struct sigaction sa;
 136
 137     sigemptyset(&sa.sa_mask);
 138     sa.sa_flags = 0;
 139
 140     sa.sa_handler = OS_SigpipeHandler;
 141     installSignalHandler(SIGPIPE, &sa, force);
 142
 143     sa.sa_handler = OS_Sigusr1Handler;
 144     installSignalHandler(SIGUSR1, &sa, force);
 145 }
 146
 147 /*
 148  *--------------------------------------------------------------
 149  *
 150  * OS_LibInit --
 151  *
 152  *      Set up the OS library for use.
 153  *
 154  *      NOTE: This function is really only needed for application
 155  *            asynchronous I/O.  It will most likely change in the
 156  *            future to setup the multi-threaded environment.
 157  *
 158  * Results:
 159  *      Returns 0 if success, -1 if not.
 160  *
 161  * Side effects:
 162  *      Async I/O table allocated and initialized.
 163  *
 164  *--------------------------------------------------------------
 165  */
 166 int OS_LibInit(int stdioFds[3])
 167 {
 168     if(libInitialized)
 169         return 0;
 170
 171     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 172     if(asyncIoTable == NULL) {
 173         errno = ENOMEM;
 174         return -1;
 175     }
 176     memset((char *) asyncIoTable, 0,
 177            asyncIoTableSize * sizeof(AioInfo));
 178
 179     FD_ZERO(&readFdSet);
 180     FD_ZERO(&writeFdSet);
 181     FD_ZERO(&readFdSetPost);
 182     FD_ZERO(&writeFdSetPost);
 183
 184     OS_InstallSignalHandlers(FALSE);
 185
 186     libInitialized = TRUE;
 187
 188     return 0;
 189 }
 190
 191 /*
 192  *--------------------------------------------------------------
 193  *
 194  * OS_LibShutdown --
 195  *
 196  *      Shutdown the OS library.
 197  *
 198  * Results:
 199  *      None.
 200  *
 201  * Side effects:
 202  *      Memory freed, fds closed.
 203  *
 204  *--------------------------------------------------------------
 205  */
 206 void OS_LibShutdown()
 207 {
 208     if(!libInitialized)
 209         return;
 210
 211     free(asyncIoTable);
 212     asyncIoTable = NULL;
 213     libInitialized = FALSE;
 214     return;
 215 }
 216
 217 /*
 218  *----------------------------------------------------------------------
 219  *
 220  * OS_BuildSockAddrUn --
 221  *
 222  *      Using the pathname bindPath, fill in the sockaddr_un structure
 223  *      *servAddrPtr and the length of this structure *servAddrLen.
 224  *
 225  *      The format of the sockaddr_un structure changed incompatibly in
 226  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 227  *      support one or the other.
 228  *
 229  * Results:
 230  *      0 for normal return, -1 for failure (bindPath too long).
 231  *
 232  *----------------------------------------------------------------------
 233  */
 234
 235 static int OS_BuildSockAddrUn(const char *bindPath,
 236                               struct sockaddr_un *servAddrPtr,
 237                               int *servAddrLen)
 238 {
 239     int bindPathLen = strlen(bindPath);
 240
 241 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 242     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 243         return -1;
 244     }
 245 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 246     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 247         return -1;
 248     }
 249 #endif
 250     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 251     servAddrPtr->sun_family = AF_UNIX;
 252     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 253 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 254     *servAddrLen = sizeof(servAddrPtr->sun_len)
 255             + sizeof(servAddrPtr->sun_family)
 256             + bindPathLen + 1;
 257     servAddrPtr->sun_len = *servAddrLen;
 258 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 259     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 260 #endif
 261     return 0;
 262 }
 263 union SockAddrUnion {
 264     struct  sockaddr_un unixVariant;
 265     struct  sockaddr_in inetVariant;
 266 };
 267
 268 /*
 269  * OS_CreateLocalIpcFd --
 270  *
 271  *   This procedure is responsible for creating the listener socket
 272  *   on Unix for local process communication.  It will create a
 273  *   domain socket or a TCP/IP socket bound to "localhost" and return
 274  *   a file descriptor to it to the caller.
 275  *
 276  * Results:
 277  *      Listener socket created.  This call returns either a valid
 278  *      file descriptor or -1 on error.
 279  *
 280  * Side effects:
 281  *      None.
 282  *
 283  *----------------------------------------------------------------------
 284  */
 285 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 286 {
 287     int listenSock, servLen;
 288     union   SockAddrUnion sa;
 289     int     tcp = FALSE;
 290     unsigned long tcp_ia = 0;
 291     char    *tp;
 292     short   port = 0;
 293     char    host[MAXPATHLEN];
 294
 295     strcpy(host, bindPath);
 296     if((tp = strchr(host, ':')) != 0) {
 297         *tp++ = 0;
 298         if((port = atoi(tp)) == 0) {
 299             *--tp = ':';
 300          } else {
 301             tcp = TRUE;
 302          }
 303     }
 304     if(tcp) {
 305       if (!*host || !strcmp(host,"*")) {
 306         tcp_ia = htonl(INADDR_ANY);
 307       } else {
 308         tcp_ia = inet_addr(host);
 309         if (tcp_ia == INADDR_NONE) {
 310           struct hostent * hep;
 311           hep = gethostbyname(host);
 312           if ((!hep) || (hep->h_addrtype != AF_INET || !hep->h_addr_list[0])) {
 313             fprintf(stderr, "Cannot resolve host name %s -- exiting!\n", host);
 314             exit(1);
 315           }
 316           if (hep->h_addr_list[1]) {
 317             fprintf(stderr, "Host %s has multiple addresses ---\n", host);
 318             fprintf(stderr, "you must choose one explicitly!!!\n");
 319             exit(1);
 320           }
 321           tcp_ia = ((struct in_addr *) (hep->h_addr))->s_addr;
 322         }
 323       }
 324     }
 325
 326     if(tcp) {
 327         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 328         if(listenSock >= 0) {
 329             int flag = 1;
 330             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 331                           (char *) &flag, sizeof(flag)) < 0) {
 332                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 333                 exit(1001);
 334             }
 335         }
 336     } else {
 337         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 338     }
 339     if(listenSock < 0) {
 340         return -1;
 341     }
 342
 343     /*
 344      * Bind the listening socket.
 345      */
 346     if(tcp) {
 347         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 348         sa.inetVariant.sin_family = AF_INET;
 349         sa.inetVariant.sin_addr.s_addr = tcp_ia;
 350         sa.inetVariant.sin_port = htons(port);
 351         servLen = sizeof(sa.inetVariant);
 352     } else {
 353         unlink(bindPath);
 354         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 355             fprintf(stderr, "Listening socket's path name is too long.\n");
 356             exit(1000);
 357         }
 358     }
 359     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 360        || listen(listenSock, backlog) < 0) {
 361         perror("bind/listen");
 362         exit(errno);
 363     }
 364
 365     return listenSock;
 366 }
 367
 368 /*
 369  *----------------------------------------------------------------------
 370  *
 371  * OS_FcgiConnect --
 372  *
 373  *      Create the socket and connect to the remote application if
 374  *      possible.
 375  *
 376  *      This was lifted from the cgi-fcgi application and was abstracted
 377  *      out because Windows NT does not have a domain socket and must
 378  *      use a named pipe which has a different API altogether.
 379  *
 380  * Results:
 381  *      -1 if fail or a valid file descriptor if connection succeeds.
 382  *
 383  * Side effects:
 384  *      Remote connection established.
 385  *
 386  *----------------------------------------------------------------------
 387  */
 388 int OS_FcgiConnect(char *bindPath)
 389 {
 390     union   SockAddrUnion sa;
 391     int servLen, resultSock;
 392     int connectStatus;
 393     char    *tp;
 394     char    host[MAXPATHLEN];
 395     short   port = 0;
 396     int     tcp = FALSE;
 397
 398     strcpy(host, bindPath);
 399     if((tp = strchr(host, ':')) != 0) {
 400         *tp++ = 0;
 401         if((port = atoi(tp)) == 0) {
 402             *--tp = ':';
 403          } else {
 404             tcp = TRUE;
 405          }
 406     }
 407     if(tcp == TRUE) {
 408         struct  hostent *hp;
 409         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 410             fprintf(stderr, "Unknown host: %s\n", bindPath);
 411             exit(1000);
 412         }
 413         sa.inetVariant.sin_family = AF_INET;
 414         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 415         sa.inetVariant.sin_port = htons(port);
 416         servLen = sizeof(sa.inetVariant);
 417         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 418     } else {
 419         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 420             fprintf(stderr, "Listening socket's path name is too long.\n");
 421             exit(1000);
 422         }
 423         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 424     }
 425
 426     ASSERT(resultSock >= 0);
 427     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 428                              servLen);
 429     if(connectStatus >= 0) {
 430         return resultSock;
 431     } else {
 432         /*
 433          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 434          * and no FCGI application server is running.
 435          */
 436         close(resultSock);
 437         return -1;
 438     }
 439 }
 440
 441 /*
 442  *--------------------------------------------------------------
 443  *
 444  * OS_Read --
 445  *
 446  *      Pass through to the unix read function.
 447  *
 448  * Results:
 449  *      Returns number of byes read, 0, or -1 failure: errno
 450  *      contains actual error.
 451  *
 452  * Side effects:
 453  *      None.
 454  *
 455  *--------------------------------------------------------------
 456  */
 457 int OS_Read(int fd, char * buf, size_t len)
 458 {
 459     if (shutdownNow) return -1;
 460     return(read(fd, buf, len));
 461 }
 462
 463 /*
 464  *--------------------------------------------------------------
 465  *
 466  * OS_Write --
 467  *
 468  *      Pass through to unix write function.
 469  *
 470  * Results:
 471  *      Returns number of byes read, 0, or -1 failure: errno
 472  *      contains actual error.
 473  *
 474  * Side effects:
 475  *      none.
 476  *
 477  *--------------------------------------------------------------
 478  */
 479 int OS_Write(int fd, char * buf, size_t len)
 480 {
 481     if (shutdownNow) return -1;
 482     return(write(fd, buf, len));
 483 }
 484
 485 /*
 486  *----------------------------------------------------------------------
 487  *
 488  * OS_SpawnChild --
 489  *
 490  *      Spawns a new FastCGI listener process.
 491  *
 492  * Results:
 493  *      0 if success, -1 if error.
 494  *
 495  * Side effects:
 496  *      Child process spawned.
 497  *
 498  *----------------------------------------------------------------------
 499  */
 500 int OS_SpawnChild(char *appPath, int listenFd)
 501 {
 502     int forkResult;
 503
 504     forkResult = fork();
 505     if(forkResult < 0) {
 506         exit(errno);
 507     }
 508
 509     if(forkResult == 0) {
 510         /*
 511          * Close STDIN unconditionally.  It's used by the parent
 512          * process for CGI communication.  The FastCGI applciation
 513          * will be replacing this with the FastCGI listenFd IF
 514          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 515          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 516          * STDERR will be closed as the FastCGI process uses a
 517          * multiplexed socket in their place.
 518          */
 519         close(STDIN_FILENO);
 520
 521         /*
 522          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 523          * we're set.  If not, change it so the child knows where to
 524          * get the listen socket from.
 525          */
 526         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 527             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 528             close(listenFd);
 529         }
 530
 531         close(STDOUT_FILENO);
 532         close(STDERR_FILENO);
 533
 534         /*
 535          * We're a child.  Exec the application.
 536          *
 537          * XXX: entire environment passes through
 538          */
 539         execl(appPath, appPath, NULL);
 540         /*
 541          * XXX: Can't do this as we've already closed STDERR!!!
 542          *
 543          * perror("exec");
 544          */
 545         exit(errno);
 546     }
 547     return 0;
 548 }
 549
 550 /*
 551  *--------------------------------------------------------------
 552  *
 553  * OS_AsyncReadStdin --
 554  *
 555  *      This initiates an asynchronous read on the standard
 556  *      input handle.
 557  *
 558  *      The abstraction is necessary because Windows NT does not
 559  *      have a clean way of "select"ing a file descriptor for
 560  *      I/O.
 561  *
 562  * Results:
 563  *      -1 if error, 0 otherwise.
 564  *
 565  * Side effects:
 566  *      Asynchronous bit is set in the readfd variable and
 567  *      request is enqueued.
 568  *
 569  *--------------------------------------------------------------
 570  */
 571 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 572                       ClientData clientData)
 573 {
 574     int index = AIO_RD_IX(STDIN_FILENO);
 575
 576     asyncIoInUse = TRUE;
 577     ASSERT(asyncIoTable[index].inUse == 0);
 578     asyncIoTable[index].procPtr = procPtr;
 579     asyncIoTable[index].clientData = clientData;
 580     asyncIoTable[index].fd = STDIN_FILENO;
 581     asyncIoTable[index].len = len;
 582     asyncIoTable[index].offset = 0;
 583     asyncIoTable[index].buf = buf;
 584     asyncIoTable[index].inUse = 1;
 585     FD_SET(STDIN_FILENO, &readFdSet);
 586     if(STDIN_FILENO > maxFd)
 587         maxFd = STDIN_FILENO;
 588     return 0;
 589 }
 590
 591 static void GrowAsyncTable(void)
 592 {
 593     int oldTableSize = asyncIoTableSize;
 594
 595     asyncIoTableSize = asyncIoTableSize * 2;
 596     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 597     if(asyncIoTable == NULL) {
 598         errno = ENOMEM;
 599         exit(errno);
 600     }
 601     memset((char *) &asyncIoTable[oldTableSize], 0,
 602            oldTableSize * sizeof(AioInfo));
 603
 604 }
 605
 606 /*
 607  *--------------------------------------------------------------
 608  *
 609  * OS_AsyncRead --
 610  *
 611  *      This initiates an asynchronous read on the file
 612  *      handle which may be a socket or named pipe.
 613  *
 614  *      We also must save the ProcPtr and ClientData, so later
 615  *      when the io completes, we know who to call.
 616  *
 617  *      We don't look at any results here (the ReadFile may
 618  *      return data if it is cached) but do all completion
 619  *      processing in OS_Select when we get the io completion
 620  *      port done notifications.  Then we call the callback.
 621  *
 622  * Results:
 623  *      -1 if error, 0 otherwise.
 624  *
 625  * Side effects:
 626  *      Asynchronous I/O operation is queued for completion.
 627  *
 628  *--------------------------------------------------------------
 629  */
 630 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 631                  OS_AsyncProc procPtr, ClientData clientData)
 632 {
 633     int index = AIO_RD_IX(fd);
 634
 635     ASSERT(asyncIoTable != NULL);
 636     asyncIoInUse = TRUE;
 637
 638     if(fd > maxFd)
 639         maxFd = fd;
 640
 641     while (index >= asyncIoTableSize) {
 642         GrowAsyncTable();
 643     }
 644
 645     ASSERT(asyncIoTable[index].inUse == 0);
 646     asyncIoTable[index].procPtr = procPtr;
 647     asyncIoTable[index].clientData = clientData;
 648     asyncIoTable[index].fd = fd;
 649     asyncIoTable[index].len = len;
 650     asyncIoTable[index].offset = offset;
 651     asyncIoTable[index].buf = buf;
 652     asyncIoTable[index].inUse = 1;
 653     FD_SET(fd, &readFdSet);
 654     return 0;
 655 }
 656
 657 /*
 658  *--------------------------------------------------------------
 659  *
 660  * OS_AsyncWrite --
 661  *
 662  *      This initiates an asynchronous write on the "fake" file
 663  *      descriptor (which may be a file, socket, or named pipe).
 664  *      We also must save the ProcPtr and ClientData, so later
 665  *      when the io completes, we know who to call.
 666  *
 667  *      We don't look at any results here (the WriteFile generally
 668  *      completes immediately) but do all completion processing
 669  *      in OS_DoIo when we get the io completion port done
 670  *      notifications.  Then we call the callback.
 671  *
 672  * Results:
 673  *      -1 if error, 0 otherwise.
 674  *
 675  * Side effects:
 676  *      Asynchronous I/O operation is queued for completion.
 677  *
 678  *--------------------------------------------------------------
 679  */
 680 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 681                   OS_AsyncProc procPtr, ClientData clientData)
 682 {
 683     int index = AIO_WR_IX(fd);
 684
 685     asyncIoInUse = TRUE;
 686
 687     if(fd > maxFd)
 688         maxFd = fd;
 689
 690     while (index >= asyncIoTableSize) {
 691         GrowAsyncTable();
 692     }
 693
 694     ASSERT(asyncIoTable[index].inUse == 0);
 695     asyncIoTable[index].procPtr = procPtr;
 696     asyncIoTable[index].clientData = clientData;
 697     asyncIoTable[index].fd = fd;
 698     asyncIoTable[index].len = len;
 699     asyncIoTable[index].offset = offset;
 700     asyncIoTable[index].buf = buf;
 701     asyncIoTable[index].inUse = 1;
 702     FD_SET(fd, &writeFdSet);
 703     return 0;
 704 }
 705
 706 /*
 707  *--------------------------------------------------------------
 708  *
 709  * OS_Close --
 710  *
 711  *      Closes the descriptor.  This is a pass through to the
 712  *      Unix close.
 713  *
 714  * Results:
 715  *      0 for success, -1 on failure
 716  *
 717  * Side effects:
 718  *      None.
 719  *
 720  *--------------------------------------------------------------
 721  */
 722 int OS_Close(int fd, int shutdown_ok)
 723 {
 724     if (fd == -1)
 725         return 0;
 726
 727     if (asyncIoInUse) {
 728         int index = AIO_RD_IX(fd);
 729
 730         FD_CLR(fd, &readFdSet);
 731         FD_CLR(fd, &readFdSetPost);
 732         if (asyncIoTable[index].inUse != 0) {
 733             asyncIoTable[index].inUse = 0;
 734         }
 735
 736         FD_CLR(fd, &writeFdSet);
 737         FD_CLR(fd, &writeFdSetPost);
 738         index = AIO_WR_IX(fd);
 739         if (asyncIoTable[index].inUse != 0) {
 740             asyncIoTable[index].inUse = 0;
 741         }
 742
 743         if (maxFd == fd) {
 744             maxFd--;
 745         }
 746     }
 747
 748     /*
 749      * shutdown() the send side and then read() from client until EOF
 750      * or a timeout expires.  This is done to minimize the potential
 751      * that a TCP RST will be sent by our TCP stack in response to
 752      * receipt of additional data from the client.  The RST would
 753      * cause the client to discard potentially useful response data.
 754      */
 755
 756     if (shutdown_ok)
 757     {
 758         if (shutdown(fd, 1) == 0)
 759         {
 760             struct timeval tv;
 761             fd_set rfds;
 762             int rv;
 763             char trash[1024];
 764
 765             FD_ZERO(&rfds);
 766
 767             do
 768             {
 769                 FD_SET(fd, &rfds);
 770                 tv.tv_sec = 2;
 771                 tv.tv_usec = 0;
 772                 rv = select(fd + 1, &rfds, NULL, NULL, &tv);
 773             }
 774             while (rv > 0 && read(fd, trash, sizeof(trash)) > 0);
 775         }
 776     }
 777
 778     return close(fd);
 779 }
 780
 781 /*
 782  *--------------------------------------------------------------
 783  *
 784  * OS_CloseRead --
 785  *
 786  *      Cancel outstanding asynchronous reads and prevent subsequent
 787  *      reads from completing.
 788  *
 789  * Results:
 790  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 791  *              0 success, -1 failure
 792  *
 793  *--------------------------------------------------------------
 794  */
 795 int OS_CloseRead(int fd)
 796 {
 797     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 798         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 799         FD_CLR(fd, &readFdSet);
 800     }
 801
 802     return shutdown(fd, 0);
 803 }
 804
 805 /*
 806  *--------------------------------------------------------------
 807  *
 808  * OS_DoIo --
 809  *
 810  *      This function was formerly OS_Select.  It's purpose is
 811  *      to pull I/O completion events off the queue and dispatch
 812  *      them to the appropriate place.
 813  *
 814  * Results:
 815  *      Returns 0.
 816  *
 817  * Side effects:
 818  *      Handlers are called.
 819  *
 820  *--------------------------------------------------------------
 821  */
 822 int OS_DoIo(struct timeval *tmo)
 823 {
 824     int fd, len, selectStatus;
 825     OS_AsyncProc procPtr;
 826     ClientData clientData;
 827     AioInfo *aioPtr;
 828     fd_set readFdSetCpy;
 829     fd_set writeFdSetCpy;
 830
 831     asyncIoInUse = TRUE;
 832     FD_ZERO(&readFdSetCpy);
 833     FD_ZERO(&writeFdSetCpy);
 834
 835     for(fd = 0; fd <= maxFd; fd++) {
 836         if(FD_ISSET(fd, &readFdSet)) {
 837             FD_SET(fd, &readFdSetCpy);
 838         }
 839         if(FD_ISSET(fd, &writeFdSet)) {
 840             FD_SET(fd, &writeFdSetCpy);
 841         }
 842     }
 843
 844     /*
 845      * If there were no completed events from a prior call, see if there's
 846      * any work to do.
 847      */
 848     if(numRdPosted == 0 && numWrPosted == 0) {
 849         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 850                               NULL, tmo);
 851         if(selectStatus < 0) {
 852             exit(errno);
 853         }
 854
 855         for(fd = 0; fd <= maxFd; fd++) {
 856             /*
 857              * Build up a list of completed events.  We'll work off of
 858              * this list as opposed to looping through the read and write
 859              * fd sets since they can be affected by a callbacl routine.
 860              */
 861             if(FD_ISSET(fd, &readFdSetCpy)) {
 862                 numRdPosted++;
 863                 FD_SET(fd, &readFdSetPost);
 864                 FD_CLR(fd, &readFdSet);
 865             }
 866
 867             if(FD_ISSET(fd, &writeFdSetCpy)) {
 868                 numWrPosted++;
 869                 FD_SET(fd, &writeFdSetPost);
 870                 FD_CLR(fd, &writeFdSet);
 871             }
 872         }
 873     }
 874
 875     if(numRdPosted == 0 && numWrPosted == 0)
 876         return 0;
 877
 878     for(fd = 0; fd <= maxFd; fd++) {
 879         /*
 880          * Do reads and dispatch callback.
 881          */
 882         if(FD_ISSET(fd, &readFdSetPost)
 883            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 884
 885             numRdPosted--;
 886             FD_CLR(fd, &readFdSetPost);
 887             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 888
 889             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 890
 891             procPtr = aioPtr->procPtr;
 892             aioPtr->procPtr = NULL;
 893             clientData = aioPtr->clientData;
 894             aioPtr->inUse = 0;
 895
 896             (*procPtr)(clientData, len);
 897         }
 898
 899         /*
 900          * Do writes and dispatch callback.
 901          */
 902         if(FD_ISSET(fd, &writeFdSetPost) &&
 903            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 904
 905             numWrPosted--;
 906             FD_CLR(fd, &writeFdSetPost);
 907             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 908
 909             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 910
 911             procPtr = aioPtr->procPtr;
 912             aioPtr->procPtr = NULL;
 913             clientData = aioPtr->clientData;
 914             aioPtr->inUse = 0;
 915             (*procPtr)(clientData, len);
 916         }
 917     }
 918     return 0;
 919 }
 920
 921 /*
 922  * Not all systems have strdup().
 923  * @@@ autoconf should determine whether or not this is needed, but for now..
 924  */
 925 static char * str_dup(const char * str)
 926 {
 927     char * sdup = (char *) malloc(strlen(str) + 1);
 928
 929     if (sdup)
 930         strcpy(sdup, str);
 931
 932     return sdup;
 933 }
 934
 935 /*
 936  *----------------------------------------------------------------------
 937  *
 938  * ClientAddrOK --
 939  *
 940  *      Checks if a client address is in a list of allowed addresses
 941  *
 942  * Results:
 943  *      TRUE if address list is empty or client address is present
 944  *      in the list, FALSE otherwise.
 945  *
 946  *----------------------------------------------------------------------
 947  */
 948 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 949 {
 950     int result = FALSE;
 951     char *clientListCopy, *cur, *next;
 952
 953     if (clientList == NULL || *clientList == '\0') {
 954         return TRUE;
 955     }
 956
 957     clientListCopy = str_dup(clientList);
 958
 959     for (cur = clientListCopy; cur != NULL; cur = next) {
 960         next = strchr(cur, ',');
 961         if (next != NULL) {
 962             *next++ = '\0';
 963         }
 964         if (inet_addr(cur) == saPtr->sin_addr.s_addr) {
 965             result = TRUE;
 966             break;
 967         }
 968     }
 969
 970     free(clientListCopy);
 971     return result;
 972 }
 973
 974 /*
 975  *----------------------------------------------------------------------
 976  *
 977  * AcquireLock --
 978  *
 979  *      On platforms that implement concurrent calls to accept
 980  *      on a shared listening ipcFd, returns 0.  On other platforms,
 981  *      acquires an exclusive lock across all processes sharing a
 982  *      listening ipcFd, blocking until the lock has been acquired.
 983  *
 984  * Results:
 985  *      0 for successful call, -1 in case of system error (fatal).
 986  *
 987  * Side effects:
 988  *      This process now has the exclusive lock.
 989  *
 990  *----------------------------------------------------------------------
 991  */
 992 static int AcquireLock(int sock, int fail_on_intr)
 993 {
 994 #ifdef USE_LOCKING
 995     do {
 996         struct flock lock;
 997         lock.l_type = F_WRLCK;
 998         lock.l_start = 0;
 999         lock.l_whence = SEEK_SET;
1000         lock.l_len = 0;
1001
1002         if (fcntl(sock, F_SETLKW, &lock) != -1)
1003             return 0;
1004     } while (errno == EINTR
1005              && ! fail_on_intr
1006              && ! shutdownPending);
1007
1008     return -1;
1009
1010 #else
1011     return 0;
1012 #endif
1013 }
1014
1015 /*
1016  *----------------------------------------------------------------------
1017  *
1018  * ReleaseLock --
1019  *
1020  *      On platforms that implement concurrent calls to accept
1021  *      on a shared listening ipcFd, does nothing.  On other platforms,
1022  *      releases an exclusive lock acquired by AcquireLock.
1023  *
1024  * Results:
1025  *      0 for successful call, -1 in case of system error (fatal).
1026  *
1027  * Side effects:
1028  *      This process no longer holds the lock.
1029  *
1030  *----------------------------------------------------------------------
1031  */
1032 static int ReleaseLock(int sock)
1033 {
1034 #ifdef USE_LOCKING
1035     do {
1036         struct flock lock;
1037         lock.l_type = F_UNLCK;
1038         lock.l_start = 0;
1039         lock.l_whence = SEEK_SET;
1040         lock.l_len = 0;
1041
1042         if (fcntl(sock, F_SETLK, &lock) != -1)
1043             return 0;
1044     } while (errno == EINTR);
1045
1046     return -1;
1047
1048 #else
1049     return 0;
1050 #endif
1051 }
1052
1053 /**********************************************************************
1054  * Determine if the errno resulting from a failed accept() warrants a
1055  * retry or exit().  Based on Apache's http_main.c accept() handling
1056  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
1057  */
1058 static int is_reasonable_accept_errno (const int error)
1059 {
1060     switch (error) {
1061 #ifdef EPROTO
1062         /* EPROTO on certain older kernels really means ECONNABORTED, so
1063          * we need to ignore it for them.  See discussion in new-httpd
1064          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
1065          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
1066          * other boxes that implement tcp sockets in userland (i.e. on top of
1067          * STREAMS).  On these systems, EPROTO can actually result in a fatal
1068          * loop.  See PR#981 for example.  It's hard to handle both uses of
1069          * EPROTO. */
1070         case EPROTO:
1071 #endif
1072 #ifdef ECONNABORTED
1073         case ECONNABORTED:
1074 #endif
1075         /* Linux generates the rest of these, other tcp stacks (i.e.
1076          * bsd) tend to hide them behind getsockopt() interfaces.  They
1077          * occur when the net goes sour or the client disconnects after the
1078          * three-way handshake has been done in the kernel but before
1079          * userland has picked up the socket. */
1080 #ifdef ECONNRESET
1081         case ECONNRESET:
1082 #endif
1083 #ifdef ETIMEDOUT
1084         case ETIMEDOUT:
1085 #endif
1086 #ifdef EHOSTUNREACH
1087         case EHOSTUNREACH:
1088 #endif
1089 #ifdef ENETUNREACH
1090         case ENETUNREACH:
1091 #endif
1092             return 1;
1093
1094         default:
1095             return 0;
1096     }
1097 }
1098
1099 /**********************************************************************
1100  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1101  * others?).  When a connect() is made to a Unix Domain socket, but its
1102  * not accept()ed before the web server gets impatient and close()s, an
1103  * accept() results in a valid file descriptor, but no data to read.
1104  * This causes a block on the first read() - which never returns!
1105  *
1106  * Another approach to this is to write() to the socket to provoke a
1107  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1108  * that whatever is written has to be universally ignored by all FastCGI
1109  * web servers, and a SIGPIPE handler has to be installed which returns
1110  * (or SIGPIPE is ignored).
1111  *
1112  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1113  *
1114  * Making it shorter is probably safe, but I'll leave that to you.  Making
1115  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1116  * the faster your application will be able to recover (waiting 2 seconds
1117  * may _cause_ the problem when there is a very high demand). At any rate,
1118  * this is better than perma-blocking.
1119  */
1120 static int is_af_unix_keeper(const int fd)
1121 {
1122     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1123     fd_set read_fds;
1124
1125     FD_ZERO(&read_fds);
1126     FD_SET(fd, &read_fds);
1127
1128     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1129 }
1130
1131 /*
1132  *----------------------------------------------------------------------
1133  *
1134  * OS_Accept --
1135  *
1136  *      Accepts a new FastCGI connection.  This routine knows whether
1137  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1138  *
1139  * Results:
1140  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1141  *
1142  * Side effects:
1143  *      New IPC connection is accepted.
1144  *
1145  *----------------------------------------------------------------------
1146  */
1147 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1148 {
1149     int socket = -1;
1150     union {
1151         struct sockaddr_un un;
1152         struct sockaddr_in in;
1153     } sa;
1154
1155     for (;;) {
1156         if (AcquireLock(listen_sock, fail_on_intr))
1157             return -1;
1158
1159         for (;;) {
1160             do {
1161 #ifdef HAVE_SOCKLEN
1162                 socklen_t len = sizeof(sa);
1163 #else
1164                 int len = sizeof(sa);
1165 #endif
1166                 if (shutdownPending) break;
1167                 /* There's a window here */
1168
1169                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1170             } while (socket < 0
1171                      && errno == EINTR
1172                      && ! fail_on_intr
1173                      && ! shutdownPending);
1174
1175             if (socket < 0) {
1176                 if (shutdownPending || ! is_reasonable_accept_errno(errno)) {
1177                     int errnoSave = errno;
1178
1179                     ReleaseLock(listen_sock);
1180
1181                     if (! shutdownPending) {
1182                         errno = errnoSave;
1183                     }
1184
1185                     return (-1);
1186                 }
1187                 errno = 0;
1188             }
1189             else {  /* socket >= 0 */
1190                 int set = 1;
1191
1192                 if (sa.in.sin_family != AF_INET)
1193                     break;
1194
1195 #ifdef TCP_NODELAY
1196                 /* No replies to outgoing data, so disable Nagle */
1197                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1198 #endif
1199
1200                 /* Check that the client IP address is approved */
1201                 if (ClientAddrOK(&sa.in, webServerAddrs))
1202                     break;
1203
1204                 close(socket);
1205             }  /* socket >= 0 */
1206         }  /* for(;;) */
1207
1208         if (ReleaseLock(listen_sock))
1209             return (-1);
1210
1211         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1212             break;
1213
1214         close(socket);
1215     }  /* while(1) - lock */
1216
1217     return (socket);
1218 }
1219
1220 /*
1221  *----------------------------------------------------------------------
1222  *
1223  * OS_IpcClose
1224  *
1225  *      OS IPC routine to close an IPC connection.
1226  *
1227  * Results:
1228  *
1229  *
1230  * Side effects:
1231  *      IPC connection is closed.
1232  *
1233  *----------------------------------------------------------------------
1234  */
1235 int OS_IpcClose(int ipcFd, int shutdown)
1236 {
1237     return OS_Close(ipcFd, shutdown);
1238 }
1239
1240 /*
1241  *----------------------------------------------------------------------
1242  *
1243  * OS_IsFcgi --
1244  *
1245  *      Determines whether this process is a FastCGI process or not.
1246  *
1247  * Results:
1248  *      Returns 1 if FastCGI, 0 if not.
1249  *
1250  * Side effects:
1251  *      None.
1252  *
1253  *----------------------------------------------------------------------
1254  */
1255 int OS_IsFcgi(int sock)
1256 {
1257         union {
1258         struct sockaddr_in in;
1259         struct sockaddr_un un;
1260     } sa;
1261 #ifdef HAVE_SOCKLEN
1262     socklen_t len = sizeof(sa);
1263 #else
1264     int len = sizeof(sa);
1265 #endif
1266
1267     errno = 0;
1268
1269     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1270         return TRUE;
1271     }
1272     else {
1273         return FALSE;
1274     }
1275 }
1276
1277 /*
1278  *----------------------------------------------------------------------
1279  *
1280  * OS_SetFlags --
1281  *
1282  *      Sets selected flag bits in an open file descriptor.
1283  *
1284  *----------------------------------------------------------------------
1285  */
1286 void OS_SetFlags(int fd, int flags)
1287 {
1288     int val;
1289     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1290         exit(errno);
1291     }
1292     val |= flags;
1293     if(fcntl(fd, F_SETFL, val) < 0) {
1294         exit(errno);
1295     }
1296 }