libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *  Bill Snapper
   5  *  snapper@openmarket.com
   6  *
   7  * Copyright (c) 1996 Open Market, Inc.
   8  *
   9  * See the file "LICENSE" for information on usage and redistribution
  10  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  11  */
  12
  13 #ifndef lint
  14 static const char rcsid[] = "$Id: os_unix.c,v 1.38 2003/06/22 00:16:43 robs Exp $";
  15 #endif /* not lint */
  16
  17 #include "fcgi_config.h"
  18
  19 #include <sys/types.h>
  20
  21 #ifdef HAVE_NETINET_IN_H
  22 #include <netinet/in.h>
  23 #endif
  24
  25 #include <arpa/inet.h>
  26 #include <assert.h>
  27 #include <errno.h>
  28 #include <fcntl.h>      /* for fcntl */
  29 #include <math.h>
  30 #include <memory.h>     /* for memchr() */
  31 #include <netinet/tcp.h>
  32 #include <stdarg.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #include <sys/time.h>
  37 #include <sys/un.h>
  38 #include <signal.h>
  39
  40 #ifdef HAVE_NETDB_H
  41 #include <netdb.h>
  42 #endif
  43
  44 #ifdef HAVE_SYS_SOCKET_H
  45 #include <sys/socket.h> /* for getpeername */
  46 #endif
  47
  48 #ifdef HAVE_UNISTD_H
  49 #include <unistd.h>
  50 #endif
  51
  52 #include "fastcgi.h"
  53 #include "fcgimisc.h"
  54 #include "fcgios.h"
  55
  56 #ifndef INADDR_NONE
  57 #define INADDR_NONE ((unsigned long) -1)
  58 #endif
  59
  60 /*
  61  * This structure holds an entry for each oustanding async I/O operation.
  62  */
  63 typedef struct {
  64     OS_AsyncProc procPtr;           /* callout completion procedure */
  65     ClientData clientData;          /* caller private data */
  66     int fd;
  67     int len;
  68     int offset;
  69     void *buf;
  70     int inUse;
  71 } AioInfo;
  72
  73 /*
  74  * Entries in the async I/O table are allocated 2 per file descriptor.
  75  *
  76  * Read Entry Index  = fd * 2
  77  * Write Entry Index = (fd * 2) + 1
  78  */
  79 #define AIO_RD_IX(fd) (fd * 2)
  80 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  81
  82 static int asyncIoInUse = FALSE;
  83 static int asyncIoTableSize = 16;
  84 static AioInfo *asyncIoTable = NULL;
  85
  86 static int libInitialized = FALSE;
  87
  88 static fd_set readFdSet;
  89 static fd_set writeFdSet;
  90
  91 static fd_set readFdSetPost;
  92 static int numRdPosted = 0;
  93 static fd_set writeFdSetPost;
  94 static int numWrPosted = 0;
  95 static int volatile maxFd = -1;
  96
  97 static int shutdownPending = FALSE;
  98 static int shutdownNow = FALSE;
  99
 100 void OS_ShutdownPending()
 101 {
 102     shutdownPending = TRUE;
 103 }
 104
 105 static void OS_Sigusr1Handler(int signo)
 106 {
 107     OS_ShutdownPending();
 108 }
 109
 110 static void OS_SigpipeHandler(int signo)
 111 {
 112     ;
 113 }
 114
 115 static void installSignalHandler(int signo, const struct sigaction * act, int force)
 116 {
 117     struct sigaction sa;
 118
 119     sigaction(signo, NULL, &sa);
 120
 121     if (force || sa.sa_handler == SIG_DFL)
 122     {
 123         sigaction(signo, act, NULL);
 124     }
 125 }
 126
 127 static void OS_InstallSignalHandlers(int force)
 128 {
 129     struct sigaction sa;
 130
 131     sigemptyset(&sa.sa_mask);
 132     sa.sa_flags = 0;
 133
 134     sa.sa_handler = OS_SigpipeHandler;
 135     installSignalHandler(SIGPIPE, &sa, force);
 136
 137     sa.sa_handler = OS_Sigusr1Handler;
 138     installSignalHandler(SIGUSR1, &sa, force);
 139 }
 140
 141 /*
 142  *--------------------------------------------------------------
 143  *
 144  * OS_LibInit --
 145  *
 146  *      Set up the OS library for use.
 147  *
 148  *      NOTE: This function is really only needed for application
 149  *            asynchronous I/O.  It will most likely change in the
 150  *            future to setup the multi-threaded environment.
 151  *
 152  * Results:
 153  *      Returns 0 if success, -1 if not.
 154  *
 155  * Side effects:
 156  *      Async I/O table allocated and initialized.
 157  *
 158  *--------------------------------------------------------------
 159  */
 160 int OS_LibInit(int stdioFds[3])
 161 {
 162     if(libInitialized)
 163         return 0;
 164
 165     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 166     if(asyncIoTable == NULL) {
 167         errno = ENOMEM;
 168         return -1;
 169     }
 170     memset((char *) asyncIoTable, 0,
 171            asyncIoTableSize * sizeof(AioInfo));
 172
 173     FD_ZERO(&readFdSet);
 174     FD_ZERO(&writeFdSet);
 175     FD_ZERO(&readFdSetPost);
 176     FD_ZERO(&writeFdSetPost);
 177
 178     OS_InstallSignalHandlers(TRUE);
 179
 180     libInitialized = TRUE;
 181
 182     return 0;
 183 }
 184
 185 /*
 186  *--------------------------------------------------------------
 187  *
 188  * OS_LibShutdown --
 189  *
 190  *      Shutdown the OS library.
 191  *
 192  * Results:
 193  *      None.
 194  *
 195  * Side effects:
 196  *      Memory freed, fds closed.
 197  *
 198  *--------------------------------------------------------------
 199  */
 200 void OS_LibShutdown()
 201 {
 202     if(!libInitialized)
 203         return;
 204
 205     free(asyncIoTable);
 206     asyncIoTable = NULL;
 207     libInitialized = FALSE;
 208     return;
 209 }
 210
 211 /*
 212  *----------------------------------------------------------------------
 213  *
 214  * OS_BuildSockAddrUn --
 215  *
 216  *      Using the pathname bindPath, fill in the sockaddr_un structure
 217  *      *servAddrPtr and the length of this structure *servAddrLen.
 218  *
 219  *      The format of the sockaddr_un structure changed incompatibly in
 220  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 221  *      support one or the other.
 222  *
 223  * Results:
 224  *      0 for normal return, -1 for failure (bindPath too long).
 225  *
 226  *----------------------------------------------------------------------
 227  */
 228
 229 static int OS_BuildSockAddrUn(const char *bindPath,
 230                               struct sockaddr_un *servAddrPtr,
 231                               int *servAddrLen)
 232 {
 233     int bindPathLen = strlen(bindPath);
 234
 235 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 236     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 237         return -1;
 238     }
 239 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 240     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 241         return -1;
 242     }
 243 #endif
 244     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 245     servAddrPtr->sun_family = AF_UNIX;
 246     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 247 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 248     *servAddrLen = sizeof(servAddrPtr->sun_len)
 249             + sizeof(servAddrPtr->sun_family)
 250             + bindPathLen + 1;
 251     servAddrPtr->sun_len = *servAddrLen;
 252 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 253     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 254 #endif
 255     return 0;
 256 }
 257 union SockAddrUnion {
 258     struct  sockaddr_un unixVariant;
 259     struct  sockaddr_in inetVariant;
 260 };
 261
 262 /*
 263  * OS_CreateLocalIpcFd --
 264  *
 265  *   This procedure is responsible for creating the listener socket
 266  *   on Unix for local process communication.  It will create a
 267  *   domain socket or a TCP/IP socket bound to "localhost" and return
 268  *   a file descriptor to it to the caller.
 269  *
 270  * Results:
 271  *      Listener socket created.  This call returns either a valid
 272  *      file descriptor or -1 on error.
 273  *
 274  * Side effects:
 275  *      None.
 276  *
 277  *----------------------------------------------------------------------
 278  */
 279 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 280 {
 281     int listenSock, servLen;
 282     union   SockAddrUnion sa;
 283     int     tcp = FALSE;
 284     unsigned long tcp_ia = 0;
 285     char    *tp;
 286     short   port = 0;
 287     char    host[MAXPATHLEN];
 288
 289     if (strlen(bindPath) >= MAXPATHLEN) {
 290             fprintf(stderr,
 291             "Listening socket path is longer than %d bytes -- exiting!\n",
 292             MAXPATHLEN);
 293             exit(1);
 294     }
 295     strcpy(host, bindPath);
 296     if((tp = strchr(host, ':')) != 0) {
 297         *tp++ = 0;
 298         if((port = atoi(tp)) == 0) {
 299             *--tp = ':';
 300          } else {
 301             tcp = TRUE;
 302          }
 303     }
 304     if(tcp) {
 305       if (!*host || !strcmp(host,"*")) {
 306         tcp_ia = htonl(INADDR_ANY);
 307       } else {
 308         tcp_ia = inet_addr(host);
 309         if (tcp_ia == INADDR_NONE) {
 310           struct hostent * hep;
 311           hep = gethostbyname(host);
 312           if ((!hep) || (hep->h_addrtype != AF_INET || !hep->h_addr_list[0])) {
 313             fprintf(stderr, "Cannot resolve host name %s -- exiting!\n", host);
 314             exit(1);
 315           }
 316           if (hep->h_addr_list[1]) {
 317             fprintf(stderr, "Host %s has multiple addresses ---\n", host);
 318             fprintf(stderr, "you must choose one explicitly!!!\n");
 319             exit(1);
 320           }
 321           tcp_ia = ((struct in_addr *) (hep->h_addr))->s_addr;
 322         }
 323       }
 324     }
 325
 326     if(tcp) {
 327         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 328         if(listenSock >= 0) {
 329             int flag = 1;
 330             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 331                           (char *) &flag, sizeof(flag)) < 0) {
 332                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 333                 exit(1001);
 334             }
 335         }
 336     } else {
 337         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 338     }
 339     if(listenSock < 0) {
 340         return -1;
 341     }
 342
 343     /*
 344      * Bind the listening socket.
 345      */
 346     if(tcp) {
 347         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 348         sa.inetVariant.sin_family = AF_INET;
 349         sa.inetVariant.sin_addr.s_addr = tcp_ia;
 350         sa.inetVariant.sin_port = htons(port);
 351         servLen = sizeof(sa.inetVariant);
 352     } else {
 353         unlink(bindPath);
 354         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 355             fprintf(stderr, "Listening socket's path name is too long.\n");
 356             exit(1000);
 357         }
 358     }
 359     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 360        || listen(listenSock, backlog) < 0) {
 361         perror("bind/listen");
 362         exit(errno);
 363     }
 364
 365     return listenSock;
 366 }
 367
 368 /*
 369  *----------------------------------------------------------------------
 370  *
 371  * OS_FcgiConnect --
 372  *
 373  *      Create the socket and connect to the remote application if
 374  *      possible.
 375  *
 376  *      This was lifted from the cgi-fcgi application and was abstracted
 377  *      out because Windows NT does not have a domain socket and must
 378  *      use a named pipe which has a different API altogether.
 379  *
 380  * Results:
 381  *      -1 if fail or a valid file descriptor if connection succeeds.
 382  *
 383  * Side effects:
 384  *      Remote connection established.
 385  *
 386  *----------------------------------------------------------------------
 387  */
 388 int OS_FcgiConnect(char *bindPath)
 389 {
 390     union   SockAddrUnion sa;
 391     int servLen, resultSock;
 392     int connectStatus;
 393     char    *tp;
 394     char    host[MAXPATHLEN];
 395     short   port = 0;
 396     int     tcp = FALSE;
 397
 398     if (strlen(bindPath) >= MAXPATHLEN) {
 399             fprintf(stderr, "Listening socket path is too long\n");
 400             exit(1000);
 401     }
 402     strcpy(host, bindPath);
 403     if((tp = strchr(host, ':')) != 0) {
 404         *tp++ = 0;
 405         if((port = atoi(tp)) == 0) {
 406             *--tp = ':';
 407          } else {
 408             tcp = TRUE;
 409          }
 410     }
 411     if(tcp == TRUE) {
 412         struct  hostent *hp;
 413         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 414             fprintf(stderr, "Unknown host: %s\n", bindPath);
 415             exit(1000);
 416         }
 417         sa.inetVariant.sin_family = AF_INET;
 418         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 419         sa.inetVariant.sin_port = htons(port);
 420         servLen = sizeof(sa.inetVariant);
 421         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 422     } else {
 423         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 424             fprintf(stderr, "Listening socket's path name is too long.\n");
 425             exit(1000);
 426         }
 427         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 428     }
 429
 430     ASSERT(resultSock >= 0);
 431     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 432                              servLen);
 433     if(connectStatus >= 0) {
 434         return resultSock;
 435     } else {
 436         /*
 437          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 438          * and no FCGI application server is running.
 439          */
 440         close(resultSock);
 441         return -1;
 442     }
 443 }
 444
 445 /*
 446  *--------------------------------------------------------------
 447  *
 448  * OS_Read --
 449  *
 450  *      Pass through to the unix read function.
 451  *
 452  * Results:
 453  *      Returns number of byes read, 0, or -1 failure: errno
 454  *      contains actual error.
 455  *
 456  * Side effects:
 457  *      None.
 458  *
 459  *--------------------------------------------------------------
 460  */
 461 int OS_Read(int fd, char * buf, size_t len)
 462 {
 463     if (shutdownNow) return -1;
 464     return(read(fd, buf, len));
 465 }
 466
 467 /*
 468  *--------------------------------------------------------------
 469  *
 470  * OS_Write --
 471  *
 472  *      Pass through to unix write function.
 473  *
 474  * Results:
 475  *      Returns number of byes read, 0, or -1 failure: errno
 476  *      contains actual error.
 477  *
 478  * Side effects:
 479  *      none.
 480  *
 481  *--------------------------------------------------------------
 482  */
 483 int OS_Write(int fd, char * buf, size_t len)
 484 {
 485     if (shutdownNow) return -1;
 486     return(write(fd, buf, len));
 487 }
 488
 489 /*
 490  *----------------------------------------------------------------------
 491  *
 492  * OS_SpawnChild --
 493  *
 494  *      Spawns a new FastCGI listener process.
 495  *
 496  * Results:
 497  *      0 if success, -1 if error.
 498  *
 499  * Side effects:
 500  *      Child process spawned.
 501  *
 502  *----------------------------------------------------------------------
 503  */
 504 int OS_SpawnChild(char *appPath, int listenFd)
 505 {
 506     int forkResult;
 507
 508     forkResult = fork();
 509     if(forkResult < 0) {
 510         exit(errno);
 511     }
 512
 513     if(forkResult == 0) {
 514         /*
 515          * Close STDIN unconditionally.  It's used by the parent
 516          * process for CGI communication.  The FastCGI applciation
 517          * will be replacing this with the FastCGI listenFd IF
 518          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 519          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 520          * STDERR will be closed as the FastCGI process uses a
 521          * multiplexed socket in their place.
 522          */
 523         close(STDIN_FILENO);
 524
 525         /*
 526          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 527          * we're set.  If not, change it so the child knows where to
 528          * get the listen socket from.
 529          */
 530         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 531             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 532             close(listenFd);
 533         }
 534
 535         close(STDOUT_FILENO);
 536         close(STDERR_FILENO);
 537
 538         /*
 539          * We're a child.  Exec the application.
 540          *
 541          * XXX: entire environment passes through
 542          */
 543         execl(appPath, appPath, NULL);
 544         /*
 545          * XXX: Can't do this as we've already closed STDERR!!!
 546          *
 547          * perror("exec");
 548          */
 549         exit(errno);
 550     }
 551     return 0;
 552 }
 553
 554 /*
 555  *--------------------------------------------------------------
 556  *
 557  * OS_AsyncReadStdin --
 558  *
 559  *      This initiates an asynchronous read on the standard
 560  *      input handle.
 561  *
 562  *      The abstraction is necessary because Windows NT does not
 563  *      have a clean way of "select"ing a file descriptor for
 564  *      I/O.
 565  *
 566  * Results:
 567  *      -1 if error, 0 otherwise.
 568  *
 569  * Side effects:
 570  *      Asynchronous bit is set in the readfd variable and
 571  *      request is enqueued.
 572  *
 573  *--------------------------------------------------------------
 574  */
 575 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 576                       ClientData clientData)
 577 {
 578     int index = AIO_RD_IX(STDIN_FILENO);
 579
 580     asyncIoInUse = TRUE;
 581     ASSERT(asyncIoTable[index].inUse == 0);
 582     asyncIoTable[index].procPtr = procPtr;
 583     asyncIoTable[index].clientData = clientData;
 584     asyncIoTable[index].fd = STDIN_FILENO;
 585     asyncIoTable[index].len = len;
 586     asyncIoTable[index].offset = 0;
 587     asyncIoTable[index].buf = buf;
 588     asyncIoTable[index].inUse = 1;
 589     FD_SET(STDIN_FILENO, &readFdSet);
 590     if(STDIN_FILENO > maxFd)
 591         maxFd = STDIN_FILENO;
 592     return 0;
 593 }
 594
 595 static void GrowAsyncTable(void)
 596 {
 597     int oldTableSize = asyncIoTableSize;
 598
 599     asyncIoTableSize = asyncIoTableSize * 2;
 600     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 601     if(asyncIoTable == NULL) {
 602         errno = ENOMEM;
 603         exit(errno);
 604     }
 605     memset((char *) &asyncIoTable[oldTableSize], 0,
 606            oldTableSize * sizeof(AioInfo));
 607
 608 }
 609
 610 /*
 611  *--------------------------------------------------------------
 612  *
 613  * OS_AsyncRead --
 614  *
 615  *      This initiates an asynchronous read on the file
 616  *      handle which may be a socket or named pipe.
 617  *
 618  *      We also must save the ProcPtr and ClientData, so later
 619  *      when the io completes, we know who to call.
 620  *
 621  *      We don't look at any results here (the ReadFile may
 622  *      return data if it is cached) but do all completion
 623  *      processing in OS_Select when we get the io completion
 624  *      port done notifications.  Then we call the callback.
 625  *
 626  * Results:
 627  *      -1 if error, 0 otherwise.
 628  *
 629  * Side effects:
 630  *      Asynchronous I/O operation is queued for completion.
 631  *
 632  *--------------------------------------------------------------
 633  */
 634 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 635                  OS_AsyncProc procPtr, ClientData clientData)
 636 {
 637     int index = AIO_RD_IX(fd);
 638
 639     ASSERT(asyncIoTable != NULL);
 640     asyncIoInUse = TRUE;
 641
 642     if(fd > maxFd)
 643         maxFd = fd;
 644
 645     while (index >= asyncIoTableSize) {
 646         GrowAsyncTable();
 647     }
 648
 649     ASSERT(asyncIoTable[index].inUse == 0);
 650     asyncIoTable[index].procPtr = procPtr;
 651     asyncIoTable[index].clientData = clientData;
 652     asyncIoTable[index].fd = fd;
 653     asyncIoTable[index].len = len;
 654     asyncIoTable[index].offset = offset;
 655     asyncIoTable[index].buf = buf;
 656     asyncIoTable[index].inUse = 1;
 657     FD_SET(fd, &readFdSet);
 658     return 0;
 659 }
 660
 661 /*
 662  *--------------------------------------------------------------
 663  *
 664  * OS_AsyncWrite --
 665  *
 666  *      This initiates an asynchronous write on the "fake" file
 667  *      descriptor (which may be a file, socket, or named pipe).
 668  *      We also must save the ProcPtr and ClientData, so later
 669  *      when the io completes, we know who to call.
 670  *
 671  *      We don't look at any results here (the WriteFile generally
 672  *      completes immediately) but do all completion processing
 673  *      in OS_DoIo when we get the io completion port done
 674  *      notifications.  Then we call the callback.
 675  *
 676  * Results:
 677  *      -1 if error, 0 otherwise.
 678  *
 679  * Side effects:
 680  *      Asynchronous I/O operation is queued for completion.
 681  *
 682  *--------------------------------------------------------------
 683  */
 684 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 685                   OS_AsyncProc procPtr, ClientData clientData)
 686 {
 687     int index = AIO_WR_IX(fd);
 688
 689     asyncIoInUse = TRUE;
 690
 691     if(fd > maxFd)
 692         maxFd = fd;
 693
 694     while (index >= asyncIoTableSize) {
 695         GrowAsyncTable();
 696     }
 697
 698     ASSERT(asyncIoTable[index].inUse == 0);
 699     asyncIoTable[index].procPtr = procPtr;
 700     asyncIoTable[index].clientData = clientData;
 701     asyncIoTable[index].fd = fd;
 702     asyncIoTable[index].len = len;
 703     asyncIoTable[index].offset = offset;
 704     asyncIoTable[index].buf = buf;
 705     asyncIoTable[index].inUse = 1;
 706     FD_SET(fd, &writeFdSet);
 707     return 0;
 708 }
 709
 710 /*
 711  *--------------------------------------------------------------
 712  *
 713  * OS_Close --
 714  *
 715  *      Closes the descriptor.  This is a pass through to the
 716  *      Unix close.
 717  *
 718  * Results:
 719  *      0 for success, -1 on failure
 720  *
 721  * Side effects:
 722  *      None.
 723  *
 724  *--------------------------------------------------------------
 725  */
 726 int OS_Close(int fd, int shutdown_ok)
 727 {
 728     if (fd == -1)
 729         return 0;
 730
 731     if (asyncIoInUse) {
 732         int index = AIO_RD_IX(fd);
 733
 734         FD_CLR(fd, &readFdSet);
 735         FD_CLR(fd, &readFdSetPost);
 736         if (asyncIoTable[index].inUse != 0) {
 737             asyncIoTable[index].inUse = 0;
 738         }
 739
 740         FD_CLR(fd, &writeFdSet);
 741         FD_CLR(fd, &writeFdSetPost);
 742         index = AIO_WR_IX(fd);
 743         if (asyncIoTable[index].inUse != 0) {
 744             asyncIoTable[index].inUse = 0;
 745         }
 746
 747         if (maxFd == fd) {
 748             maxFd--;
 749         }
 750     }
 751
 752     /*
 753      * shutdown() the send side and then read() from client until EOF
 754      * or a timeout expires.  This is done to minimize the potential
 755      * that a TCP RST will be sent by our TCP stack in response to
 756      * receipt of additional data from the client.  The RST would
 757      * cause the client to discard potentially useful response data.
 758      */
 759
 760     if (shutdown_ok)
 761     {
 762         if (shutdown(fd, 1) == 0)
 763         {
 764             struct timeval tv;
 765             fd_set rfds;
 766             int rv;
 767             char trash[1024];
 768
 769             FD_ZERO(&rfds);
 770
 771             do
 772             {
 773                 FD_SET(fd, &rfds);
 774                 tv.tv_sec = 2;
 775                 tv.tv_usec = 0;
 776                 rv = select(fd + 1, &rfds, NULL, NULL, &tv);
 777             }
 778             while (rv > 0 && read(fd, trash, sizeof(trash)) > 0);
 779         }
 780     }
 781
 782     return close(fd);
 783 }
 784
 785 /*
 786  *--------------------------------------------------------------
 787  *
 788  * OS_CloseRead --
 789  *
 790  *      Cancel outstanding asynchronous reads and prevent subsequent
 791  *      reads from completing.
 792  *
 793  * Results:
 794  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 795  *              0 success, -1 failure
 796  *
 797  *--------------------------------------------------------------
 798  */
 799 int OS_CloseRead(int fd)
 800 {
 801     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 802         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 803         FD_CLR(fd, &readFdSet);
 804     }
 805
 806     return shutdown(fd, 0);
 807 }
 808
 809 /*
 810  *--------------------------------------------------------------
 811  *
 812  * OS_DoIo --
 813  *
 814  *      This function was formerly OS_Select.  It's purpose is
 815  *      to pull I/O completion events off the queue and dispatch
 816  *      them to the appropriate place.
 817  *
 818  * Results:
 819  *      Returns 0.
 820  *
 821  * Side effects:
 822  *      Handlers are called.
 823  *
 824  *--------------------------------------------------------------
 825  */
 826 int OS_DoIo(struct timeval *tmo)
 827 {
 828     int fd, len, selectStatus;
 829     OS_AsyncProc procPtr;
 830     ClientData clientData;
 831     AioInfo *aioPtr;
 832     fd_set readFdSetCpy;
 833     fd_set writeFdSetCpy;
 834
 835     asyncIoInUse = TRUE;
 836     FD_ZERO(&readFdSetCpy);
 837     FD_ZERO(&writeFdSetCpy);
 838
 839     for(fd = 0; fd <= maxFd; fd++) {
 840         if(FD_ISSET(fd, &readFdSet)) {
 841             FD_SET(fd, &readFdSetCpy);
 842         }
 843         if(FD_ISSET(fd, &writeFdSet)) {
 844             FD_SET(fd, &writeFdSetCpy);
 845         }
 846     }
 847
 848     /*
 849      * If there were no completed events from a prior call, see if there's
 850      * any work to do.
 851      */
 852     if(numRdPosted == 0 && numWrPosted == 0) {
 853         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 854                               NULL, tmo);
 855         if(selectStatus < 0) {
 856             exit(errno);
 857         }
 858
 859         for(fd = 0; fd <= maxFd; fd++) {
 860             /*
 861              * Build up a list of completed events.  We'll work off of
 862              * this list as opposed to looping through the read and write
 863              * fd sets since they can be affected by a callbacl routine.
 864              */
 865             if(FD_ISSET(fd, &readFdSetCpy)) {
 866                 numRdPosted++;
 867                 FD_SET(fd, &readFdSetPost);
 868                 FD_CLR(fd, &readFdSet);
 869             }
 870
 871             if(FD_ISSET(fd, &writeFdSetCpy)) {
 872                 numWrPosted++;
 873                 FD_SET(fd, &writeFdSetPost);
 874                 FD_CLR(fd, &writeFdSet);
 875             }
 876         }
 877     }
 878
 879     if(numRdPosted == 0 && numWrPosted == 0)
 880         return 0;
 881
 882     for(fd = 0; fd <= maxFd; fd++) {
 883         /*
 884          * Do reads and dispatch callback.
 885          */
 886         if(FD_ISSET(fd, &readFdSetPost)
 887            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 888
 889             numRdPosted--;
 890             FD_CLR(fd, &readFdSetPost);
 891             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 892
 893             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 894
 895             procPtr = aioPtr->procPtr;
 896             aioPtr->procPtr = NULL;
 897             clientData = aioPtr->clientData;
 898             aioPtr->inUse = 0;
 899
 900             (*procPtr)(clientData, len);
 901         }
 902
 903         /*
 904          * Do writes and dispatch callback.
 905          */
 906         if(FD_ISSET(fd, &writeFdSetPost) &&
 907            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 908
 909             numWrPosted--;
 910             FD_CLR(fd, &writeFdSetPost);
 911             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 912
 913             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 914
 915             procPtr = aioPtr->procPtr;
 916             aioPtr->procPtr = NULL;
 917             clientData = aioPtr->clientData;
 918             aioPtr->inUse = 0;
 919             (*procPtr)(clientData, len);
 920         }
 921     }
 922     return 0;
 923 }
 924
 925 /*
 926  * Not all systems have strdup().
 927  * @@@ autoconf should determine whether or not this is needed, but for now..
 928  */
 929 static char * str_dup(const char * str)
 930 {
 931     char * sdup = (char *) malloc(strlen(str) + 1);
 932
 933     if (sdup)
 934         strcpy(sdup, str);
 935
 936     return sdup;
 937 }
 938
 939 /*
 940  *----------------------------------------------------------------------
 941  *
 942  * ClientAddrOK --
 943  *
 944  *      Checks if a client address is in a list of allowed addresses
 945  *
 946  * Results:
 947  *      TRUE if address list is empty or client address is present
 948  *      in the list, FALSE otherwise.
 949  *
 950  *----------------------------------------------------------------------
 951  */
 952 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 953 {
 954     int result = FALSE;
 955     char *clientListCopy, *cur, *next;
 956
 957     if (clientList == NULL || *clientList == '\0') {
 958         return TRUE;
 959     }
 960
 961     clientListCopy = str_dup(clientList);
 962
 963     for (cur = clientListCopy; cur != NULL; cur = next) {
 964         next = strchr(cur, ',');
 965         if (next != NULL) {
 966             *next++ = '\0';
 967         }
 968         if (inet_addr(cur) == saPtr->sin_addr.s_addr) {
 969             result = TRUE;
 970             break;
 971         }
 972     }
 973
 974     free(clientListCopy);
 975     return result;
 976 }
 977
 978 /*
 979  *----------------------------------------------------------------------
 980  *
 981  * AcquireLock --
 982  *
 983  *      On platforms that implement concurrent calls to accept
 984  *      on a shared listening ipcFd, returns 0.  On other platforms,
 985  *      acquires an exclusive lock across all processes sharing a
 986  *      listening ipcFd, blocking until the lock has been acquired.
 987  *
 988  * Results:
 989  *      0 for successful call, -1 in case of system error (fatal).
 990  *
 991  * Side effects:
 992  *      This process now has the exclusive lock.
 993  *
 994  *----------------------------------------------------------------------
 995  */
 996 static int AcquireLock(int sock, int fail_on_intr)
 997 {
 998 #ifdef USE_LOCKING
 999     do {
1000         struct flock lock;
1001         lock.l_type = F_WRLCK;
1002         lock.l_start = 0;
1003         lock.l_whence = SEEK_SET;
1004         lock.l_len = 0;
1005
1006         if (fcntl(sock, F_SETLKW, &lock) != -1)
1007             return 0;
1008     } while (errno == EINTR
1009              && ! fail_on_intr
1010              && ! shutdownPending);
1011
1012     return -1;
1013
1014 #else
1015     return 0;
1016 #endif
1017 }
1018
1019 /*
1020  *----------------------------------------------------------------------
1021  *
1022  * ReleaseLock --
1023  *
1024  *      On platforms that implement concurrent calls to accept
1025  *      on a shared listening ipcFd, does nothing.  On other platforms,
1026  *      releases an exclusive lock acquired by AcquireLock.
1027  *
1028  * Results:
1029  *      0 for successful call, -1 in case of system error (fatal).
1030  *
1031  * Side effects:
1032  *      This process no longer holds the lock.
1033  *
1034  *----------------------------------------------------------------------
1035  */
1036 static int ReleaseLock(int sock)
1037 {
1038 #ifdef USE_LOCKING
1039     do {
1040         struct flock lock;
1041         lock.l_type = F_UNLCK;
1042         lock.l_start = 0;
1043         lock.l_whence = SEEK_SET;
1044         lock.l_len = 0;
1045
1046         if (fcntl(sock, F_SETLK, &lock) != -1)
1047             return 0;
1048     } while (errno == EINTR);
1049
1050     return -1;
1051
1052 #else
1053     return 0;
1054 #endif
1055 }
1056
1057 /**********************************************************************
1058  * Determine if the errno resulting from a failed accept() warrants a
1059  * retry or exit().  Based on Apache's http_main.c accept() handling
1060  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
1061  */
1062 static int is_reasonable_accept_errno (const int error)
1063 {
1064     switch (error) {
1065 #ifdef EPROTO
1066         /* EPROTO on certain older kernels really means ECONNABORTED, so
1067          * we need to ignore it for them.  See discussion in new-httpd
1068          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
1069          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
1070          * other boxes that implement tcp sockets in userland (i.e. on top of
1071          * STREAMS).  On these systems, EPROTO can actually result in a fatal
1072          * loop.  See PR#981 for example.  It's hard to handle both uses of
1073          * EPROTO. */
1074         case EPROTO:
1075 #endif
1076 #ifdef ECONNABORTED
1077         case ECONNABORTED:
1078 #endif
1079         /* Linux generates the rest of these, other tcp stacks (i.e.
1080          * bsd) tend to hide them behind getsockopt() interfaces.  They
1081          * occur when the net goes sour or the client disconnects after the
1082          * three-way handshake has been done in the kernel but before
1083          * userland has picked up the socket. */
1084 #ifdef ECONNRESET
1085         case ECONNRESET:
1086 #endif
1087 #ifdef ETIMEDOUT
1088         case ETIMEDOUT:
1089 #endif
1090 #ifdef EHOSTUNREACH
1091         case EHOSTUNREACH:
1092 #endif
1093 #ifdef ENETUNREACH
1094         case ENETUNREACH:
1095 #endif
1096             return 1;
1097
1098         default:
1099             return 0;
1100     }
1101 }
1102
1103 /**********************************************************************
1104  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1105  * others?).  When a connect() is made to a Unix Domain socket, but its
1106  * not accept()ed before the web server gets impatient and close()s, an
1107  * accept() results in a valid file descriptor, but no data to read.
1108  * This causes a block on the first read() - which never returns!
1109  *
1110  * Another approach to this is to write() to the socket to provoke a
1111  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1112  * that whatever is written has to be universally ignored by all FastCGI
1113  * web servers, and a SIGPIPE handler has to be installed which returns
1114  * (or SIGPIPE is ignored).
1115  *
1116  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1117  *
1118  * Making it shorter is probably safe, but I'll leave that to you.  Making
1119  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1120  * the faster your application will be able to recover (waiting 2 seconds
1121  * may _cause_ the problem when there is a very high demand). At any rate,
1122  * this is better than perma-blocking.
1123  */
1124 static int is_af_unix_keeper(const int fd)
1125 {
1126     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1127     fd_set read_fds;
1128
1129     FD_ZERO(&read_fds);
1130     FD_SET(fd, &read_fds);
1131
1132     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1133 }
1134
1135 /*
1136  *----------------------------------------------------------------------
1137  *
1138  * OS_Accept --
1139  *
1140  *      Accepts a new FastCGI connection.  This routine knows whether
1141  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1142  *
1143  * Results:
1144  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1145  *
1146  * Side effects:
1147  *      New IPC connection is accepted.
1148  *
1149  *----------------------------------------------------------------------
1150  */
1151 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1152 {
1153     int socket = -1;
1154     union {
1155         struct sockaddr_un un;
1156         struct sockaddr_in in;
1157     } sa;
1158
1159     for (;;) {
1160         if (AcquireLock(listen_sock, fail_on_intr))
1161             return -1;
1162
1163         for (;;) {
1164             do {
1165 #ifdef HAVE_SOCKLEN
1166                 socklen_t len = sizeof(sa);
1167 #else
1168                 int len = sizeof(sa);
1169 #endif
1170                 if (shutdownPending) break;
1171                 /* There's a window here */
1172
1173                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1174             } while (socket < 0
1175                      && errno == EINTR
1176                      && ! fail_on_intr
1177                      && ! shutdownPending);
1178
1179             if (socket < 0) {
1180                 if (shutdownPending || ! is_reasonable_accept_errno(errno)) {
1181                     int errnoSave = errno;
1182
1183                     ReleaseLock(listen_sock);
1184
1185                     if (! shutdownPending) {
1186                         errno = errnoSave;
1187                     }
1188
1189                     return (-1);
1190                 }
1191                 errno = 0;
1192             }
1193             else {  /* socket >= 0 */
1194                 int set = 1;
1195
1196                 if (sa.in.sin_family != AF_INET)
1197                     break;
1198
1199 #ifdef TCP_NODELAY
1200                 /* No replies to outgoing data, so disable Nagle */
1201                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1202 #endif
1203
1204                 /* Check that the client IP address is approved */
1205                 if (ClientAddrOK(&sa.in, webServerAddrs))
1206                     break;
1207
1208                 close(socket);
1209             }  /* socket >= 0 */
1210         }  /* for(;;) */
1211
1212         if (ReleaseLock(listen_sock))
1213             return (-1);
1214
1215         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1216             break;
1217
1218         close(socket);
1219     }  /* while(1) - lock */
1220
1221     return (socket);
1222 }
1223
1224 /*
1225  *----------------------------------------------------------------------
1226  *
1227  * OS_IpcClose
1228  *
1229  *      OS IPC routine to close an IPC connection.
1230  *
1231  * Results:
1232  *
1233  *
1234  * Side effects:
1235  *      IPC connection is closed.
1236  *
1237  *----------------------------------------------------------------------
1238  */
1239 int OS_IpcClose(int ipcFd, int shutdown)
1240 {
1241     return OS_Close(ipcFd, shutdown);
1242 }
1243
1244 /*
1245  *----------------------------------------------------------------------
1246  *
1247  * OS_IsFcgi --
1248  *
1249  *      Determines whether this process is a FastCGI process or not.
1250  *
1251  * Results:
1252  *      Returns 1 if FastCGI, 0 if not.
1253  *
1254  * Side effects:
1255  *      None.
1256  *
1257  *----------------------------------------------------------------------
1258  */
1259 int OS_IsFcgi(int sock)
1260 {
1261         union {
1262         struct sockaddr_in in;
1263         struct sockaddr_un un;
1264     } sa;
1265 #ifdef HAVE_SOCKLEN
1266     socklen_t len = sizeof(sa);
1267 #else
1268     int len = sizeof(sa);
1269 #endif
1270
1271     errno = 0;
1272
1273     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1274         return TRUE;
1275     }
1276     else {
1277         return FALSE;
1278     }
1279 }
1280
1281 /*
1282  *----------------------------------------------------------------------
1283  *
1284  * OS_SetFlags --
1285  *
1286  *      Sets selected flag bits in an open file descriptor.
1287  *
1288  *----------------------------------------------------------------------
1289  */
1290 void OS_SetFlags(int fd, int flags)
1291 {
1292     int val;
1293     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1294         exit(errno);
1295     }
1296     val |= flags;
1297     if(fcntl(fd, F_SETFL, val) < 0) {
1298         exit(errno);
1299     }
1300 }