libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.26 2001/06/18 14:24:28 robs Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <sys/types.h>
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30
  31 #include <arpa/inet.h>
  32 #include <assert.h>
  33 #include <errno.h>
  34 #include <fcntl.h>      /* for fcntl */
  35 #include <math.h>
  36 #include <memory.h>     /* for memchr() */
  37 #include <netinet/tcp.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/time.h>
  43 #include <sys/un.h>
  44
  45 #ifdef HAVE_NETDB_H
  46 #include <netdb.h>
  47 #endif
  48
  49 #ifdef HAVE_SYS_SOCKET_H
  50 #include <sys/socket.h> /* for getpeername */
  51 #endif
  52
  53 #ifdef HAVE_UNISTD_H
  54 #include <unistd.h>
  55 #endif
  56
  57 #include "fastcgi.h"
  58 #include "fcgiapp.h"
  59 #include "fcgiappmisc.h"
  60 #include "fcgimisc.h"
  61 #include "fcgios.h"
  62
  63 #ifndef INADDR_NONE
  64 #define INADDR_NONE ((unsigned long) -1)
  65 #endif
  66
  67 /*
  68  * This structure holds an entry for each oustanding async I/O operation.
  69  */
  70 typedef struct {
  71     OS_AsyncProc procPtr;           /* callout completion procedure */
  72     ClientData clientData;          /* caller private data */
  73     int fd;
  74     int len;
  75     int offset;
  76     void *buf;
  77     int inUse;
  78 } AioInfo;
  79
  80 /*
  81  * Entries in the async I/O table are allocated 2 per file descriptor.
  82  *
  83  * Read Entry Index  = fd * 2
  84  * Write Entry Index = (fd * 2) + 1
  85  */
  86 #define AIO_RD_IX(fd) (fd * 2)
  87 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  88
  89 static int asyncIoInUse = FALSE;
  90 static int asyncIoTableSize = 16;
  91 static AioInfo *asyncIoTable = NULL;
  92
  93 static int libInitialized = FALSE;
  94
  95 static fd_set readFdSet;
  96 static fd_set writeFdSet;
  97
  98 static fd_set readFdSetPost;
  99 static int numRdPosted = 0;
 100 static fd_set writeFdSetPost;
 101 static int numWrPosted = 0;
 102 static int volatile maxFd = -1;
 103
 104 /*
 105  *--------------------------------------------------------------
 106  *
 107  * OS_LibInit --
 108  *
 109  *      Set up the OS library for use.
 110  *
 111  *      NOTE: This function is really only needed for application
 112  *            asynchronous I/O.  It will most likely change in the
 113  *            future to setup the multi-threaded environment.
 114  *
 115  * Results:
 116  *      Returns 0 if success, -1 if not.
 117  *
 118  * Side effects:
 119  *      Async I/O table allocated and initialized.
 120  *
 121  *--------------------------------------------------------------
 122  */
 123 int OS_LibInit(int stdioFds[3])
 124 {
 125     if(libInitialized)
 126         return 0;
 127
 128     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 129     if(asyncIoTable == NULL) {
 130         errno = ENOMEM;
 131         return -1;
 132     }
 133     memset((char *) asyncIoTable, 0,
 134            asyncIoTableSize * sizeof(AioInfo));
 135
 136     FD_ZERO(&readFdSet);
 137     FD_ZERO(&writeFdSet);
 138     FD_ZERO(&readFdSetPost);
 139     FD_ZERO(&writeFdSetPost);
 140     libInitialized = TRUE;
 141     return 0;
 142 }
 143
 144 /*
 145  *--------------------------------------------------------------
 146  *
 147  * OS_LibShutdown --
 148  *
 149  *      Shutdown the OS library.
 150  *
 151  * Results:
 152  *      None.
 153  *
 154  * Side effects:
 155  *      Memory freed, fds closed.
 156  *
 157  *--------------------------------------------------------------
 158  */
 159 void OS_LibShutdown()
 160 {
 161     if(!libInitialized)
 162         return;
 163
 164     free(asyncIoTable);
 165     asyncIoTable = NULL;
 166     libInitialized = FALSE;
 167     return;
 168 }
 169
 170 /*
 171  *----------------------------------------------------------------------
 172  *
 173  * OS_BuildSockAddrUn --
 174  *
 175  *      Using the pathname bindPath, fill in the sockaddr_un structure
 176  *      *servAddrPtr and the length of this structure *servAddrLen.
 177  *
 178  *      The format of the sockaddr_un structure changed incompatibly in
 179  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 180  *      support one or the other.
 181  *
 182  * Results:
 183  *      0 for normal return, -1 for failure (bindPath too long).
 184  *
 185  *----------------------------------------------------------------------
 186  */
 187
 188 static int OS_BuildSockAddrUn(const char *bindPath,
 189                               struct sockaddr_un *servAddrPtr,
 190                               int *servAddrLen)
 191 {
 192     int bindPathLen = strlen(bindPath);
 193
 194 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 195     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 196         return -1;
 197     }
 198 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 199     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 200         return -1;
 201     }
 202 #endif
 203     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 204     servAddrPtr->sun_family = AF_UNIX;
 205     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 206 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 207     *servAddrLen = sizeof(servAddrPtr->sun_len)
 208             + sizeof(servAddrPtr->sun_family)
 209             + bindPathLen + 1;
 210     servAddrPtr->sun_len = *servAddrLen;
 211 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 212     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 213 #endif
 214     return 0;
 215 }
 216 union SockAddrUnion {
 217     struct  sockaddr_un unixVariant;
 218     struct  sockaddr_in inetVariant;
 219 };
 220
 221 /*
 222  * OS_CreateLocalIpcFd --
 223  *
 224  *   This procedure is responsible for creating the listener socket
 225  *   on Unix for local process communication.  It will create a
 226  *   domain socket or a TCP/IP socket bound to "localhost" and return
 227  *   a file descriptor to it to the caller.
 228  *
 229  * Results:
 230  *      Listener socket created.  This call returns either a valid
 231  *      file descriptor or -1 on error.
 232  *
 233  * Side effects:
 234  *      None.
 235  *
 236  *----------------------------------------------------------------------
 237  */
 238 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 239 {
 240     int listenSock, servLen;
 241     union   SockAddrUnion sa;
 242     int     tcp = FALSE;
 243     unsigned long tcp_ia;
 244     char    *tp;
 245     short   port;
 246     char    host[MAXPATHLEN];
 247
 248     strcpy(host, bindPath);
 249     if((tp = strchr(host, ':')) != 0) {
 250         *tp++ = 0;
 251         if((port = atoi(tp)) == 0) {
 252             *--tp = ':';
 253          } else {
 254             tcp = TRUE;
 255          }
 256     }
 257     if(tcp) {
 258       if (!*host || !strcmp(host,"*")) {
 259         tcp_ia = htonl(INADDR_ANY);
 260       } else {
 261         tcp_ia = inet_addr(host);
 262         if (tcp_ia == INADDR_NONE) {
 263           struct hostent * hep;
 264           hep = gethostbyname(host);
 265           if ((!hep) || (hep->h_addrtype != AF_INET || !hep->h_addr_list[0])) {
 266             fprintf(stderr, "Cannot resolve host name %s -- exiting!\n", host);
 267             exit(1);
 268           }
 269           if (hep->h_addr_list[1]) {
 270             fprintf(stderr, "Host %s has multiple addresses ---\n", host);
 271             fprintf(stderr, "you must choose one explicitly!!!\n");
 272             exit(1);
 273           }
 274           tcp_ia = ((struct in_addr *) (hep->h_addr))->s_addr;
 275         }
 276       }
 277     }
 278
 279     if(tcp) {
 280         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 281         if(listenSock >= 0) {
 282             int flag = 1;
 283             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 284                           (char *) &flag, sizeof(flag)) < 0) {
 285                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 286                 exit(1001);
 287             }
 288         }
 289     } else {
 290         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 291     }
 292     if(listenSock < 0) {
 293         return -1;
 294     }
 295
 296     /*
 297      * Bind the listening socket.
 298      */
 299     if(tcp) {
 300         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 301         sa.inetVariant.sin_family = AF_INET;
 302         sa.inetVariant.sin_addr.s_addr = tcp_ia;
 303         sa.inetVariant.sin_port = htons(port);
 304         servLen = sizeof(sa.inetVariant);
 305     } else {
 306         unlink(bindPath);
 307         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 308             fprintf(stderr, "Listening socket's path name is too long.\n");
 309             exit(1000);
 310         }
 311     }
 312     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 313        || listen(listenSock, backlog) < 0) {
 314         perror("bind/listen");
 315         exit(errno);
 316     }
 317
 318     return listenSock;
 319 }
 320
 321 /*
 322  *----------------------------------------------------------------------
 323  *
 324  * OS_FcgiConnect --
 325  *
 326  *      Create the socket and connect to the remote application if
 327  *      possible.
 328  *
 329  *      This was lifted from the cgi-fcgi application and was abstracted
 330  *      out because Windows NT does not have a domain socket and must
 331  *      use a named pipe which has a different API altogether.
 332  *
 333  * Results:
 334  *      -1 if fail or a valid file descriptor if connection succeeds.
 335  *
 336  * Side effects:
 337  *      Remote connection established.
 338  *
 339  *----------------------------------------------------------------------
 340  */
 341 int OS_FcgiConnect(char *bindPath)
 342 {
 343     union   SockAddrUnion sa;
 344     int servLen, resultSock;
 345     int connectStatus;
 346     char    *tp;
 347     char    host[MAXPATHLEN];
 348     short   port;
 349     int     tcp = FALSE;
 350
 351     strcpy(host, bindPath);
 352     if((tp = strchr(host, ':')) != 0) {
 353         *tp++ = 0;
 354         if((port = atoi(tp)) == 0) {
 355             *--tp = ':';
 356          } else {
 357             tcp = TRUE;
 358          }
 359     }
 360     if(tcp == TRUE) {
 361         struct  hostent *hp;
 362         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 363             fprintf(stderr, "Unknown host: %s\n", bindPath);
 364             exit(1000);
 365         }
 366         sa.inetVariant.sin_family = AF_INET;
 367         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 368         sa.inetVariant.sin_port = htons(port);
 369         servLen = sizeof(sa.inetVariant);
 370         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 371     } else {
 372         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 373             fprintf(stderr, "Listening socket's path name is too long.\n");
 374             exit(1000);
 375         }
 376         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 377     }
 378
 379     ASSERT(resultSock >= 0);
 380     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 381                              servLen);
 382     if(connectStatus >= 0) {
 383         return resultSock;
 384     } else {
 385         /*
 386          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 387          * and no FCGI application server is running.
 388          */
 389         close(resultSock);
 390         return -1;
 391     }
 392 }
 393
 394 /*
 395  *--------------------------------------------------------------
 396  *
 397  * OS_Read --
 398  *
 399  *      Pass through to the unix read function.
 400  *
 401  * Results:
 402  *      Returns number of byes read, 0, or -1 failure: errno
 403  *      contains actual error.
 404  *
 405  * Side effects:
 406  *      None.
 407  *
 408  *--------------------------------------------------------------
 409  */
 410 int OS_Read(int fd, char * buf, size_t len)
 411 {
 412     return(read(fd, buf, len));
 413 }
 414
 415 /*
 416  *--------------------------------------------------------------
 417  *
 418  * OS_Write --
 419  *
 420  *      Pass through to unix write function.
 421  *
 422  * Results:
 423  *      Returns number of byes read, 0, or -1 failure: errno
 424  *      contains actual error.
 425  *
 426  * Side effects:
 427  *      none.
 428  *
 429  *--------------------------------------------------------------
 430  */
 431 int OS_Write(int fd, char * buf, size_t len)
 432 {
 433     return(write(fd, buf, len));
 434 }
 435
 436 /*
 437  *----------------------------------------------------------------------
 438  *
 439  * OS_SpawnChild --
 440  *
 441  *      Spawns a new FastCGI listener process.
 442  *
 443  * Results:
 444  *      0 if success, -1 if error.
 445  *
 446  * Side effects:
 447  *      Child process spawned.
 448  *
 449  *----------------------------------------------------------------------
 450  */
 451 int OS_SpawnChild(char *appPath, int listenFd)
 452 {
 453     int forkResult;
 454
 455     forkResult = fork();
 456     if(forkResult < 0) {
 457         exit(errno);
 458     }
 459
 460     if(forkResult == 0) {
 461         /*
 462          * Close STDIN unconditionally.  It's used by the parent
 463          * process for CGI communication.  The FastCGI applciation
 464          * will be replacing this with the FastCGI listenFd IF
 465          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 466          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 467          * STDERR will be closed as the FastCGI process uses a
 468          * multiplexed socket in their place.
 469          */
 470         close(STDIN_FILENO);
 471
 472         /*
 473          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 474          * we're set.  If not, change it so the child knows where to
 475          * get the listen socket from.
 476          */
 477         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 478             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 479             close(listenFd);
 480         }
 481
 482         close(STDOUT_FILENO);
 483         close(STDERR_FILENO);
 484
 485         /*
 486          * We're a child.  Exec the application.
 487          *
 488          * XXX: entire environment passes through
 489          */
 490         execl(appPath, appPath, NULL);
 491         /*
 492          * XXX: Can't do this as we've already closed STDERR!!!
 493          *
 494          * perror("exec");
 495          */
 496         exit(errno);
 497     }
 498     return 0;
 499 }
 500
 501 /*
 502  *--------------------------------------------------------------
 503  *
 504  * OS_AsyncReadStdin --
 505  *
 506  *      This initiates an asynchronous read on the standard
 507  *      input handle.
 508  *
 509  *      The abstraction is necessary because Windows NT does not
 510  *      have a clean way of "select"ing a file descriptor for
 511  *      I/O.
 512  *
 513  * Results:
 514  *      -1 if error, 0 otherwise.
 515  *
 516  * Side effects:
 517  *      Asynchronous bit is set in the readfd variable and
 518  *      request is enqueued.
 519  *
 520  *--------------------------------------------------------------
 521  */
 522 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 523                       ClientData clientData)
 524 {
 525     int index = AIO_RD_IX(STDIN_FILENO);
 526
 527     asyncIoInUse = TRUE;
 528     ASSERT(asyncIoTable[index].inUse == 0);
 529     asyncIoTable[index].procPtr = procPtr;
 530     asyncIoTable[index].clientData = clientData;
 531     asyncIoTable[index].fd = STDIN_FILENO;
 532     asyncIoTable[index].len = len;
 533     asyncIoTable[index].offset = 0;
 534     asyncIoTable[index].buf = buf;
 535     asyncIoTable[index].inUse = 1;
 536     FD_SET(STDIN_FILENO, &readFdSet);
 537     if(STDIN_FILENO > maxFd)
 538         maxFd = STDIN_FILENO;
 539     return 0;
 540 }
 541
 542 static void GrowAsyncTable(void)
 543 {
 544     int oldTableSize = asyncIoTableSize;
 545
 546     asyncIoTableSize = asyncIoTableSize * 2;
 547     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 548     if(asyncIoTable == NULL) {
 549         errno = ENOMEM;
 550         exit(errno);
 551     }
 552     memset((char *) &asyncIoTable[oldTableSize], 0,
 553            oldTableSize * sizeof(AioInfo));
 554
 555 }
 556
 557 /*
 558  *--------------------------------------------------------------
 559  *
 560  * OS_AsyncRead --
 561  *
 562  *      This initiates an asynchronous read on the file
 563  *      handle which may be a socket or named pipe.
 564  *
 565  *      We also must save the ProcPtr and ClientData, so later
 566  *      when the io completes, we know who to call.
 567  *
 568  *      We don't look at any results here (the ReadFile may
 569  *      return data if it is cached) but do all completion
 570  *      processing in OS_Select when we get the io completion
 571  *      port done notifications.  Then we call the callback.
 572  *
 573  * Results:
 574  *      -1 if error, 0 otherwise.
 575  *
 576  * Side effects:
 577  *      Asynchronous I/O operation is queued for completion.
 578  *
 579  *--------------------------------------------------------------
 580  */
 581 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 582                  OS_AsyncProc procPtr, ClientData clientData)
 583 {
 584     int index = AIO_RD_IX(fd);
 585
 586     ASSERT(asyncIoTable != NULL);
 587     asyncIoInUse = TRUE;
 588
 589     if(fd > maxFd)
 590         maxFd = fd;
 591
 592     if(index >= asyncIoTableSize) {
 593         GrowAsyncTable();
 594     }
 595
 596     ASSERT(asyncIoTable[index].inUse == 0);
 597     asyncIoTable[index].procPtr = procPtr;
 598     asyncIoTable[index].clientData = clientData;
 599     asyncIoTable[index].fd = fd;
 600     asyncIoTable[index].len = len;
 601     asyncIoTable[index].offset = offset;
 602     asyncIoTable[index].buf = buf;
 603     asyncIoTable[index].inUse = 1;
 604     FD_SET(fd, &readFdSet);
 605     return 0;
 606 }
 607
 608 /*
 609  *--------------------------------------------------------------
 610  *
 611  * OS_AsyncWrite --
 612  *
 613  *      This initiates an asynchronous write on the "fake" file
 614  *      descriptor (which may be a file, socket, or named pipe).
 615  *      We also must save the ProcPtr and ClientData, so later
 616  *      when the io completes, we know who to call.
 617  *
 618  *      We don't look at any results here (the WriteFile generally
 619  *      completes immediately) but do all completion processing
 620  *      in OS_DoIo when we get the io completion port done
 621  *      notifications.  Then we call the callback.
 622  *
 623  * Results:
 624  *      -1 if error, 0 otherwise.
 625  *
 626  * Side effects:
 627  *      Asynchronous I/O operation is queued for completion.
 628  *
 629  *--------------------------------------------------------------
 630  */
 631 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 632                   OS_AsyncProc procPtr, ClientData clientData)
 633 {
 634     int index = AIO_WR_IX(fd);
 635
 636     asyncIoInUse = TRUE;
 637
 638     if(fd > maxFd)
 639         maxFd = fd;
 640
 641     if(index >= asyncIoTableSize) {
 642         GrowAsyncTable();
 643     }
 644
 645     ASSERT(asyncIoTable[index].inUse == 0);
 646     asyncIoTable[index].procPtr = procPtr;
 647     asyncIoTable[index].clientData = clientData;
 648     asyncIoTable[index].fd = fd;
 649     asyncIoTable[index].len = len;
 650     asyncIoTable[index].offset = offset;
 651     asyncIoTable[index].buf = buf;
 652     asyncIoTable[index].inUse = 1;
 653     FD_SET(fd, &writeFdSet);
 654     return 0;
 655 }
 656
 657 /*
 658  *--------------------------------------------------------------
 659  *
 660  * OS_Close --
 661  *
 662  *      Closes the descriptor.  This is a pass through to the
 663  *      Unix close.
 664  *
 665  * Results:
 666  *      0 for success, -1 on failure
 667  *
 668  * Side effects:
 669  *      None.
 670  *
 671  *--------------------------------------------------------------
 672  */
 673 int OS_Close(int fd)
 674 {
 675     if (fd == -1)
 676         return 0;
 677
 678     if (asyncIoInUse) {
 679         int index = AIO_RD_IX(fd);
 680
 681         FD_CLR(fd, &readFdSet);
 682         FD_CLR(fd, &readFdSetPost);
 683         if (asyncIoTable[index].inUse != 0) {
 684             asyncIoTable[index].inUse = 0;
 685         }
 686
 687         FD_CLR(fd, &writeFdSet);
 688         FD_CLR(fd, &writeFdSetPost);
 689         index = AIO_WR_IX(fd);
 690         if (asyncIoTable[index].inUse != 0) {
 691             asyncIoTable[index].inUse = 0;
 692         }
 693
 694         if (maxFd == fd) {
 695             maxFd--;
 696         }
 697     }
 698     return close(fd);
 699 }
 700
 701 /*
 702  *--------------------------------------------------------------
 703  *
 704  * OS_CloseRead --
 705  *
 706  *      Cancel outstanding asynchronous reads and prevent subsequent
 707  *      reads from completing.
 708  *
 709  * Results:
 710  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 711  *              0 success, -1 failure
 712  *
 713  *--------------------------------------------------------------
 714  */
 715 int OS_CloseRead(int fd)
 716 {
 717     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 718         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 719         FD_CLR(fd, &readFdSet);
 720     }
 721
 722     return shutdown(fd, 0);
 723 }
 724
 725 /*
 726  *--------------------------------------------------------------
 727  *
 728  * OS_DoIo --
 729  *
 730  *      This function was formerly OS_Select.  It's purpose is
 731  *      to pull I/O completion events off the queue and dispatch
 732  *      them to the appropriate place.
 733  *
 734  * Results:
 735  *      Returns 0.
 736  *
 737  * Side effects:
 738  *      Handlers are called.
 739  *
 740  *--------------------------------------------------------------
 741  */
 742 int OS_DoIo(struct timeval *tmo)
 743 {
 744     int fd, len, selectStatus;
 745     OS_AsyncProc procPtr;
 746     ClientData clientData;
 747     AioInfo *aioPtr;
 748     fd_set readFdSetCpy;
 749     fd_set writeFdSetCpy;
 750
 751     asyncIoInUse = TRUE;
 752     FD_ZERO(&readFdSetCpy);
 753     FD_ZERO(&writeFdSetCpy);
 754
 755     for(fd = 0; fd <= maxFd; fd++) {
 756         if(FD_ISSET(fd, &readFdSet)) {
 757             FD_SET(fd, &readFdSetCpy);
 758         }
 759         if(FD_ISSET(fd, &writeFdSet)) {
 760             FD_SET(fd, &writeFdSetCpy);
 761         }
 762     }
 763
 764     /*
 765      * If there were no completed events from a prior call, see if there's
 766      * any work to do.
 767      */
 768     if(numRdPosted == 0 && numWrPosted == 0) {
 769         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 770                               NULL, tmo);
 771         if(selectStatus < 0) {
 772             exit(errno);
 773         }
 774
 775         for(fd = 0; fd <= maxFd; fd++) {
 776             /*
 777              * Build up a list of completed events.  We'll work off of
 778              * this list as opposed to looping through the read and write
 779              * fd sets since they can be affected by a callbacl routine.
 780              */
 781             if(FD_ISSET(fd, &readFdSetCpy)) {
 782                 numRdPosted++;
 783                 FD_SET(fd, &readFdSetPost);
 784                 FD_CLR(fd, &readFdSet);
 785             }
 786
 787             if(FD_ISSET(fd, &writeFdSetCpy)) {
 788                 numWrPosted++;
 789                 FD_SET(fd, &writeFdSetPost);
 790                 FD_CLR(fd, &writeFdSet);
 791             }
 792         }
 793     }
 794
 795     if(numRdPosted == 0 && numWrPosted == 0)
 796         return 0;
 797
 798     for(fd = 0; fd <= maxFd; fd++) {
 799         /*
 800          * Do reads and dispatch callback.
 801          */
 802         if(FD_ISSET(fd, &readFdSetPost)
 803            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 804
 805             numRdPosted--;
 806             FD_CLR(fd, &readFdSetPost);
 807             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 808
 809             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 810
 811             procPtr = aioPtr->procPtr;
 812             aioPtr->procPtr = NULL;
 813             clientData = aioPtr->clientData;
 814             aioPtr->inUse = 0;
 815
 816             (*procPtr)(clientData, len);
 817         }
 818
 819         /*
 820          * Do writes and dispatch callback.
 821          */
 822         if(FD_ISSET(fd, &writeFdSetPost) &&
 823            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 824
 825             numWrPosted--;
 826             FD_CLR(fd, &writeFdSetPost);
 827             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 828
 829             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 830
 831             procPtr = aioPtr->procPtr;
 832             aioPtr->procPtr = NULL;
 833             clientData = aioPtr->clientData;
 834             aioPtr->inUse = 0;
 835             (*procPtr)(clientData, len);
 836         }
 837     }
 838     return 0;
 839 }
 840
 841 /*
 842  * Not all systems have strdup().
 843  * @@@ autoconf should determine whether or not this is needed, but for now..
 844  */
 845 char * str_dup(const char * str)
 846 {
 847     char * sdup = (char *) malloc(strlen(str) + 1);
 848
 849     if (sdup)
 850         strcpy(sdup, str);
 851
 852     return sdup;
 853 }
 854
 855 /*
 856  *----------------------------------------------------------------------
 857  *
 858  * ClientAddrOK --
 859  *
 860  *      Checks if a client address is in a list of allowed addresses
 861  *
 862  * Results:
 863  *      TRUE if address list is empty or client address is present
 864  *      in the list, FALSE otherwise.
 865  *
 866  *----------------------------------------------------------------------
 867  */
 868 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 869 {
 870     int result = FALSE;
 871     char *clientListCopy, *cur, *next;
 872
 873     if (clientList == NULL || *clientList == '\0') {
 874         return TRUE;
 875     }
 876
 877     clientListCopy = str_dup(clientList);
 878
 879     for (cur = clientListCopy; cur != NULL; cur = next) {
 880         next = strchr(cur, ',');
 881         if (next != NULL) {
 882             *next++ = '\0';
 883         }
 884         if (inet_addr(cur) == saPtr->sin_addr.s_addr) {
 885             result = TRUE;
 886             break;
 887         }
 888     }
 889
 890     free(clientListCopy);
 891     return result;
 892 }
 893
 894 /*
 895  *----------------------------------------------------------------------
 896  *
 897  * AcquireLock --
 898  *
 899  *      On platforms that implement concurrent calls to accept
 900  *      on a shared listening ipcFd, returns 0.  On other platforms,
 901  *      acquires an exclusive lock across all processes sharing a
 902  *      listening ipcFd, blocking until the lock has been acquired.
 903  *
 904  * Results:
 905  *      0 for successful call, -1 in case of system error (fatal).
 906  *
 907  * Side effects:
 908  *      This process now has the exclusive lock.
 909  *
 910  *----------------------------------------------------------------------
 911  */
 912 static int AcquireLock(int sock, int fail_on_intr)
 913 {
 914 #ifdef USE_LOCKING
 915     do {
 916         struct flock lock;
 917         lock.l_type = F_WRLCK;
 918         lock.l_start = 0;
 919         lock.l_whence = SEEK_SET;
 920         lock.l_len = 0;
 921
 922         if (fcntl(sock, F_SETLKW, &lock) != -1)
 923             return 0;
 924     } while (errno == EINTR && !fail_on_intr);
 925
 926     return -1;
 927
 928 #else
 929     return 0;
 930 #endif
 931 }
 932
 933 /*
 934  *----------------------------------------------------------------------
 935  *
 936  * ReleaseLock --
 937  *
 938  *      On platforms that implement concurrent calls to accept
 939  *      on a shared listening ipcFd, does nothing.  On other platforms,
 940  *      releases an exclusive lock acquired by AcquireLock.
 941  *
 942  * Results:
 943  *      0 for successful call, -1 in case of system error (fatal).
 944  *
 945  * Side effects:
 946  *      This process no longer holds the lock.
 947  *
 948  *----------------------------------------------------------------------
 949  */
 950 static int ReleaseLock(int sock)
 951 {
 952 #ifdef USE_LOCKING
 953     do {
 954         struct flock lock;
 955         lock.l_type = F_UNLCK;
 956         lock.l_start = 0;
 957         lock.l_whence = SEEK_SET;
 958         lock.l_len = 0;
 959
 960         if (fcntl(sock, F_SETLK, &lock) != -1)
 961             return 0;
 962     } while (errno == EINTR);
 963
 964     return -1;
 965
 966 #else
 967     return 0;
 968 #endif
 969 }
 970
 971 /**********************************************************************
 972  * Determine if the errno resulting from a failed accept() warrants a
 973  * retry or exit().  Based on Apache's http_main.c accept() handling
 974  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
 975  */
 976 static int is_reasonable_accept_errno (const int error)
 977 {
 978     switch (error) {
 979 #ifdef EPROTO
 980         /* EPROTO on certain older kernels really means ECONNABORTED, so
 981          * we need to ignore it for them.  See discussion in new-httpd
 982          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
 983          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
 984          * other boxes that implement tcp sockets in userland (i.e. on top of
 985          * STREAMS).  On these systems, EPROTO can actually result in a fatal
 986          * loop.  See PR#981 for example.  It's hard to handle both uses of
 987          * EPROTO. */
 988         case EPROTO:
 989 #endif
 990 #ifdef ECONNABORTED
 991         case ECONNABORTED:
 992 #endif
 993         /* Linux generates the rest of these, other tcp stacks (i.e.
 994          * bsd) tend to hide them behind getsockopt() interfaces.  They
 995          * occur when the net goes sour or the client disconnects after the
 996          * three-way handshake has been done in the kernel but before
 997          * userland has picked up the socket. */
 998 #ifdef ECONNRESET
 999         case ECONNRESET:
1000 #endif
1001 #ifdef ETIMEDOUT
1002         case ETIMEDOUT:
1003 #endif
1004 #ifdef EHOSTUNREACH
1005         case EHOSTUNREACH:
1006 #endif
1007 #ifdef ENETUNREACH
1008         case ENETUNREACH:
1009 #endif
1010             return 1;
1011
1012         default:
1013             return 0;
1014     }
1015 }
1016
1017 /**********************************************************************
1018  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1019  * others?).  When a connect() is made to a Unix Domain socket, but its
1020  * not accept()ed before the web server gets impatient and close()s, an
1021  * accept() results in a valid file descriptor, but no data to read.
1022  * This causes a block on the first read() - which never returns!
1023  *
1024  * Another approach to this is to write() to the socket to provoke a
1025  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1026  * that whatever is written has to be universally ignored by all FastCGI
1027  * web servers, and a SIGPIPE handler has to be installed which returns
1028  * (or SIGPIPE is ignored).
1029  *
1030  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1031  *
1032  * Making it shorter is probably safe, but I'll leave that to you.  Making
1033  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1034  * the faster your application will be able to recover (waiting 2 seconds
1035  * may _cause_ the problem when there is a very high demand). At any rate,
1036  * this is better than perma-blocking.
1037  */
1038 static int is_af_unix_keeper(const int fd)
1039 {
1040     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1041     fd_set read_fds;
1042
1043     FD_ZERO(&read_fds);
1044     FD_SET(fd, &read_fds);
1045
1046     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1047 }
1048
1049 /*
1050  *----------------------------------------------------------------------
1051  *
1052  * OS_Accept --
1053  *
1054  *      Accepts a new FastCGI connection.  This routine knows whether
1055  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1056  *
1057  * Results:
1058  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1059  *
1060  * Side effects:
1061  *      New IPC connection is accepted.
1062  *
1063  *----------------------------------------------------------------------
1064  */
1065 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1066 {
1067     int socket;
1068     union {
1069         struct sockaddr_un un;
1070         struct sockaddr_in in;
1071     } sa;
1072
1073     for (;;) {
1074         if (AcquireLock(listen_sock, fail_on_intr))
1075             return -1;
1076
1077         for (;;) {
1078             do {
1079 #ifdef HAVE_SOCKLEN
1080                 socklen_t len = sizeof(sa);
1081 #else
1082                 int len = sizeof(sa);
1083 #endif
1084                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1085             } while (socket < 0 && errno == EINTR && !fail_on_intr);
1086
1087             if (socket < 0) {
1088                 if (!is_reasonable_accept_errno(errno)) {
1089                     int errnoSave = errno;
1090                     ReleaseLock(listen_sock);
1091                     errno = errnoSave;
1092                     return (-1);
1093                 }
1094                 errno = 0;
1095             }
1096             else {  /* socket >= 0 */
1097                 int set = 1;
1098
1099                 if (sa.in.sin_family != AF_INET)
1100                     break;
1101
1102 #ifdef TCP_NODELAY
1103                 /* No replies to outgoing data, so disable Nagle */
1104                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1105 #endif
1106
1107                 /* Check that the client IP address is approved */
1108                 if (ClientAddrOK(&sa.in, webServerAddrs))
1109                     break;
1110
1111                 close(socket);
1112             }  /* socket >= 0 */
1113         }  /* for(;;) */
1114
1115         if (ReleaseLock(listen_sock))
1116             return (-1);
1117
1118         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1119             break;
1120
1121         close(socket);
1122     }  /* while(1) - lock */
1123
1124     return (socket);
1125 }
1126
1127 /*
1128  *----------------------------------------------------------------------
1129  *
1130  * OS_IpcClose
1131  *
1132  *      OS IPC routine to close an IPC connection.
1133  *
1134  * Results:
1135  *
1136  *
1137  * Side effects:
1138  *      IPC connection is closed.
1139  *
1140  *----------------------------------------------------------------------
1141  */
1142 int OS_IpcClose(int ipcFd)
1143 {
1144     return OS_Close(ipcFd);
1145 }
1146
1147 /*
1148  *----------------------------------------------------------------------
1149  *
1150  * OS_IsFcgi --
1151  *
1152  *      Determines whether this process is a FastCGI process or not.
1153  *
1154  * Results:
1155  *      Returns 1 if FastCGI, 0 if not.
1156  *
1157  * Side effects:
1158  *      None.
1159  *
1160  *----------------------------------------------------------------------
1161  */
1162 int OS_IsFcgi(int sock)
1163 {
1164         union {
1165         struct sockaddr_in in;
1166         struct sockaddr_un un;
1167     } sa;
1168 #ifdef HAVE_SOCKLEN
1169     socklen_t len = sizeof(sa);
1170 #else
1171     int len = sizeof(sa);
1172 #endif
1173
1174     errno = 0;
1175
1176     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1177         return TRUE;
1178     }
1179     else {
1180         return FALSE;
1181     }
1182 }
1183
1184 /*
1185  *----------------------------------------------------------------------
1186  *
1187  * OS_SetFlags --
1188  *
1189  *      Sets selected flag bits in an open file descriptor.
1190  *
1191  *----------------------------------------------------------------------
1192  */
1193 void OS_SetFlags(int fd, int flags)
1194 {
1195     int val;
1196     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1197         exit(errno);
1198     }
1199     val |= flags;
1200     if(fcntl(fd, F_SETFL, val) < 0) {
1201         exit(errno);
1202     }
1203 }