Note potential bug when accept returns fds >32. ernst@wildape.com
[catagits/fcgi2.git] / libfcgi / os_unix.c
CommitLineData
aadcc3c8 1/*
0198fd3c 2 * os_unix.c --
3 *
4 * Description of file.
5 *
6 *
7 * Copyright (c) 1995 Open Market, Inc.
8 * All rights reserved.
9 *
10 * This file contains proprietary and confidential information and
aadcc3c8 11 * remains the unpublished property of Open Market, Inc. Use,
12 * disclosure, or reproduction is prohibited except as permitted by
13 * express written license agreement with Open Market, Inc.
0198fd3c 14 *
15 * Bill Snapper
16 * snapper@openmarket.com
17 */
18
19#ifndef lint
2b391aca 20static const char rcsid[] = "$Id: os_unix.c,v 1.18 2000/08/26 02:43:01 robs Exp $";
0198fd3c 21#endif /* not lint */
22
6ad90ad2 23#include "fcgi_config.h"
0198fd3c 24
27064097 25#include <sys/types.h>
26
8db9dd8f 27#ifdef HAVE_NETINET_IN_H
28#include <netinet/in.h>
29#endif
30
6ad90ad2 31#include <arpa/inet.h>
0198fd3c 32#include <assert.h>
0198fd3c 33#include <errno.h>
6ad90ad2 34#include <fcntl.h> /* for fcntl */
0198fd3c 35#include <math.h>
6ad90ad2 36#include <memory.h> /* for memchr() */
37#include <netinet/tcp.h>
38#include <stdarg.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <sys/time.h>
0198fd3c 43#include <sys/un.h>
6ad90ad2 44
0198fd3c 45#ifdef HAVE_NETDB_H
46#include <netdb.h>
47#endif
0198fd3c 48
6ad90ad2 49#ifdef HAVE_SYS_SOCKET_H
50#include <sys/socket.h> /* for getpeername */
51#endif
52
53#ifdef HAVE_UNISTD_H
54#include <unistd.h>
55#endif
56
57#include "fastcgi.h"
58#include "fcgiapp.h"
59#include "fcgiappmisc.h"
60#include "fcgimisc.h"
0198fd3c 61#include "fcgios.h"
62
6ad90ad2 63#ifndef FALSE
64#define FALSE 0
65#endif
66
67#ifndef TRUE
68#define TRUE 1
0198fd3c 69#endif
70
71/*
72 * This structure holds an entry for each oustanding async I/O operation.
73 */
74typedef struct {
75 OS_AsyncProc procPtr; /* callout completion procedure */
76 ClientData clientData; /* caller private data */
77 int fd;
78 int len;
79 int offset;
80 void *buf;
81 int inUse;
82} AioInfo;
83
84/*
85 * Entries in the async I/O table are allocated 2 per file descriptor.
86 *
87 * Read Entry Index = fd * 2
88 * Write Entry Index = (fd * 2) + 1
89 */
90#define AIO_RD_IX(fd) (fd * 2)
91#define AIO_WR_IX(fd) ((fd * 2) + 1)
92
2b391aca 93static int asyncIoInUse = FALSE;
0198fd3c 94static int asyncIoTableSize = 16;
95static AioInfo *asyncIoTable = NULL;
0198fd3c 96
0198fd3c 97static int libInitialized = FALSE;
98
99static fd_set readFdSet;
100static fd_set writeFdSet;
101
102static fd_set readFdSetPost;
103static int numRdPosted = 0;
104static fd_set writeFdSetPost;
105static int numWrPosted = 0;
106static int volatile maxFd = -1;
107
0198fd3c 108\f
109/*
110 *--------------------------------------------------------------
111 *
112 * OS_LibInit --
113 *
114 * Set up the OS library for use.
115 *
116 * NOTE: This function is really only needed for application
117 * asynchronous I/O. It will most likely change in the
118 * future to setup the multi-threaded environment.
119 *
120 * Results:
121 * Returns 0 if success, -1 if not.
122 *
123 * Side effects:
124 * Async I/O table allocated and initialized.
125 *
126 *--------------------------------------------------------------
127 */
128int OS_LibInit(int stdioFds[3])
129{
130 if(libInitialized)
131 return 0;
aadcc3c8 132
133 asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
0198fd3c 134 if(asyncIoTable == NULL) {
135 errno = ENOMEM;
136 return -1;
137 }
138 memset((char *) asyncIoTable, 0,
139 asyncIoTableSize * sizeof(AioInfo));
140
141 FD_ZERO(&readFdSet);
142 FD_ZERO(&writeFdSet);
143 FD_ZERO(&readFdSetPost);
144 FD_ZERO(&writeFdSetPost);
145 libInitialized = TRUE;
146 return 0;
147}
148
149\f
150/*
151 *--------------------------------------------------------------
152 *
153 * OS_LibShutdown --
154 *
155 * Shutdown the OS library.
156 *
157 * Results:
158 * None.
159 *
160 * Side effects:
161 * Memory freed, fds closed.
162 *
163 *--------------------------------------------------------------
164 */
165void OS_LibShutdown()
166{
167 if(!libInitialized)
168 return;
aadcc3c8 169
0198fd3c 170 free(asyncIoTable);
171 asyncIoTable = NULL;
172 libInitialized = FALSE;
173 return;
174}
175
176\f
177/*
178 *----------------------------------------------------------------------
179 *
180 * OS_BuildSockAddrUn --
181 *
182 * Using the pathname bindPath, fill in the sockaddr_un structure
183 * *servAddrPtr and the length of this structure *servAddrLen.
184 *
185 * The format of the sockaddr_un structure changed incompatibly in
186 * 4.3BSD Reno. Digital UNIX supports both formats, other systems
187 * support one or the other.
188 *
189 * Results:
190 * 0 for normal return, -1 for failure (bindPath too long).
191 *
192 *----------------------------------------------------------------------
193 */
194
0b7c9662 195static int OS_BuildSockAddrUn(const char *bindPath,
0198fd3c 196 struct sockaddr_un *servAddrPtr,
197 int *servAddrLen)
198{
199 int bindPathLen = strlen(bindPath);
200
201#ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
202 if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
203 return -1;
204 }
205#else /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
206 if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
207 return -1;
208 }
209#endif
210 memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
211 servAddrPtr->sun_family = AF_UNIX;
212 memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
213#ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
214 *servAddrLen = sizeof(servAddrPtr->sun_len)
215 + sizeof(servAddrPtr->sun_family)
216 + bindPathLen + 1;
217 servAddrPtr->sun_len = *servAddrLen;
218#else /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
219 *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
220#endif
221 return 0;
222}
223\f
224union SockAddrUnion {
225 struct sockaddr_un unixVariant;
226 struct sockaddr_in inetVariant;
227};
228
229\f
230/*
231 * OS_CreateLocalIpcFd --
232 *
233 * This procedure is responsible for creating the listener socket
234 * on Unix for local process communication. It will create a
235 * domain socket or a TCP/IP socket bound to "localhost" and return
236 * a file descriptor to it to the caller.
237 *
238 * Results:
239 * Listener socket created. This call returns either a valid
240 * file descriptor or -1 on error.
241 *
242 * Side effects:
243 * None.
244 *
245 *----------------------------------------------------------------------
246 */
0b7c9662 247int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
0198fd3c 248{
249 int listenSock, servLen;
250 union SockAddrUnion sa;
251 int tcp = FALSE;
252 char *tp;
253 short port;
254 char host[MAXPATHLEN];
255
256 strcpy(host, bindPath);
257 if((tp = strchr(host, ':')) != 0) {
258 *tp++ = 0;
259 if((port = atoi(tp)) == 0) {
260 *--tp = ':';
261 } else {
262 tcp = TRUE;
263 }
264 }
265 if(tcp && (*host && strcmp(host, "localhost") != 0)) {
266 fprintf(stderr, "To start a service on a TCP port can not "
267 "specify a host name.\n"
268 "You should either use \"localhost:<port>\" or "
269 " just use \":<port>.\"\n");
270 exit(1);
271 }
272
273 if(tcp) {
274 listenSock = socket(AF_INET, SOCK_STREAM, 0);
275 if(listenSock >= 0) {
276 int flag = 1;
277 if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
278 (char *) &flag, sizeof(flag)) < 0) {
279 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
280 exit(1001);
281 }
282 }
283 } else {
284 listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
285 }
286 if(listenSock < 0) {
287 return -1;
288 }
289
290 /*
291 * Bind the listening socket.
292 */
293 if(tcp) {
294 memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
295 sa.inetVariant.sin_family = AF_INET;
296 sa.inetVariant.sin_addr.s_addr = htonl(INADDR_ANY);
297 sa.inetVariant.sin_port = htons(port);
298 servLen = sizeof(sa.inetVariant);
299 } else {
300 unlink(bindPath);
301 if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
302 fprintf(stderr, "Listening socket's path name is too long.\n");
303 exit(1000);
304 }
305 }
306 if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
0b7c9662 307 || listen(listenSock, backlog) < 0) {
0198fd3c 308 perror("bind/listen");
309 exit(errno);
310 }
311
312 return listenSock;
313}
314
315\f
316/*
317 *----------------------------------------------------------------------
318 *
319 * OS_FcgiConnect --
320 *
321 * Create the socket and connect to the remote application if
322 * possible.
323 *
324 * This was lifted from the cgi-fcgi application and was abstracted
325 * out because Windows NT does not have a domain socket and must
326 * use a named pipe which has a different API altogether.
327 *
328 * Results:
329 * -1 if fail or a valid file descriptor if connection succeeds.
330 *
331 * Side effects:
332 * Remote connection established.
333 *
334 *----------------------------------------------------------------------
335 */
336int OS_FcgiConnect(char *bindPath)
337{
338 union SockAddrUnion sa;
339 int servLen, resultSock;
340 int connectStatus;
341 char *tp;
342 char host[MAXPATHLEN];
343 short port;
344 int tcp = FALSE;
345
346 strcpy(host, bindPath);
347 if((tp = strchr(host, ':')) != 0) {
348 *tp++ = 0;
349 if((port = atoi(tp)) == 0) {
350 *--tp = ':';
351 } else {
352 tcp = TRUE;
353 }
354 }
355 if(tcp == TRUE) {
356 struct hostent *hp;
357 if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
358 fprintf(stderr, "Unknown host: %s\n", bindPath);
359 exit(1000);
360 }
361 sa.inetVariant.sin_family = AF_INET;
203fd55e 362 memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
0198fd3c 363 sa.inetVariant.sin_port = htons(port);
364 servLen = sizeof(sa.inetVariant);
365 resultSock = socket(AF_INET, SOCK_STREAM, 0);
366 } else {
367 if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
368 fprintf(stderr, "Listening socket's path name is too long.\n");
369 exit(1000);
370 }
371 resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
372 }
373
6f902b14 374 ASSERT(resultSock >= 0);
0198fd3c 375 connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
376 servLen);
377 if(connectStatus >= 0) {
378 return resultSock;
379 } else {
380 /*
381 * Most likely (errno == ENOENT || errno == ECONNREFUSED)
382 * and no FCGI application server is running.
383 */
384 close(resultSock);
385 return -1;
386 }
387}
aadcc3c8 388
0198fd3c 389\f
390/*
391 *--------------------------------------------------------------
392 *
393 * OS_Read --
394 *
395 * Pass through to the unix read function.
396 *
397 * Results:
398 * Returns number of byes read, 0, or -1 failure: errno
399 * contains actual error.
400 *
401 * Side effects:
402 * None.
403 *
404 *--------------------------------------------------------------
405 */
406int OS_Read(int fd, char * buf, size_t len)
407{
408 return(read(fd, buf, len));
409}
410\f
411/*
412 *--------------------------------------------------------------
413 *
414 * OS_Write --
415 *
416 * Pass through to unix write function.
417 *
418 * Results:
419 * Returns number of byes read, 0, or -1 failure: errno
420 * contains actual error.
421 *
422 * Side effects:
423 * none.
424 *
425 *--------------------------------------------------------------
426 */
427int OS_Write(int fd, char * buf, size_t len)
428{
429 return(write(fd, buf, len));
430}
431
432\f
433/*
434 *----------------------------------------------------------------------
435 *
436 * OS_SpawnChild --
437 *
438 * Spawns a new FastCGI listener process.
439 *
440 * Results:
441 * 0 if success, -1 if error.
442 *
443 * Side effects:
444 * Child process spawned.
445 *
446 *----------------------------------------------------------------------
447 */
448int OS_SpawnChild(char *appPath, int listenFd)
449{
450 int forkResult;
451
452 forkResult = fork();
453 if(forkResult < 0) {
454 exit(errno);
455 }
456
457 if(forkResult == 0) {
458 /*
459 * Close STDIN unconditionally. It's used by the parent
460 * process for CGI communication. The FastCGI applciation
461 * will be replacing this with the FastCGI listenFd IF
462 * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
463 * (which it is on Unix). Regardless, STDIN, STDOUT, and
464 * STDERR will be closed as the FastCGI process uses a
465 * multiplexed socket in their place.
466 */
467 close(STDIN_FILENO);
468
469 /*
470 * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
471 * we're set. If not, change it so the child knows where to
472 * get the listen socket from.
473 */
474 if(listenFd != FCGI_LISTENSOCK_FILENO) {
475 dup2(listenFd, FCGI_LISTENSOCK_FILENO);
476 close(listenFd);
477 }
478
479 close(STDOUT_FILENO);
480 close(STDERR_FILENO);
481
482 /*
483 * We're a child. Exec the application.
484 *
485 * XXX: entire environment passes through
486 */
487 execl(appPath, appPath, NULL);
488 /*
489 * XXX: Can't do this as we've already closed STDERR!!!
490 *
491 * perror("exec");
492 */
493 exit(errno);
494 }
495 return 0;
496}
497
498\f
499/*
500 *--------------------------------------------------------------
501 *
502 * OS_AsyncReadStdin --
503 *
504 * This initiates an asynchronous read on the standard
505 * input handle.
506 *
507 * The abstraction is necessary because Windows NT does not
508 * have a clean way of "select"ing a file descriptor for
509 * I/O.
510 *
511 * Results:
512 * -1 if error, 0 otherwise.
513 *
514 * Side effects:
515 * Asynchronous bit is set in the readfd variable and
516 * request is enqueued.
517 *
518 *--------------------------------------------------------------
519 */
aadcc3c8 520int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
0198fd3c 521 ClientData clientData)
522{
523 int index = AIO_RD_IX(STDIN_FILENO);
524
2b391aca 525 asyncIoInUse = TRUE;
0198fd3c 526 ASSERT(asyncIoTable[index].inUse == 0);
527 asyncIoTable[index].procPtr = procPtr;
528 asyncIoTable[index].clientData = clientData;
529 asyncIoTable[index].fd = STDIN_FILENO;
530 asyncIoTable[index].len = len;
531 asyncIoTable[index].offset = 0;
532 asyncIoTable[index].buf = buf;
533 asyncIoTable[index].inUse = 1;
534 FD_SET(STDIN_FILENO, &readFdSet);
535 if(STDIN_FILENO > maxFd)
536 maxFd = STDIN_FILENO;
537 return 0;
538}
539
540static void GrowAsyncTable(void)
541{
542 int oldTableSize = asyncIoTableSize;
aadcc3c8 543
0198fd3c 544 asyncIoTableSize = asyncIoTableSize * 2;
aadcc3c8 545 asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
0198fd3c 546 if(asyncIoTable == NULL) {
547 errno = ENOMEM;
548 exit(errno);
549 }
550 memset((char *) &asyncIoTable[oldTableSize], 0,
551 oldTableSize * sizeof(AioInfo));
552
553}
554
555\f
556/*
557 *--------------------------------------------------------------
558 *
559 * OS_AsyncRead --
560 *
561 * This initiates an asynchronous read on the file
562 * handle which may be a socket or named pipe.
563 *
564 * We also must save the ProcPtr and ClientData, so later
565 * when the io completes, we know who to call.
566 *
567 * We don't look at any results here (the ReadFile may
568 * return data if it is cached) but do all completion
569 * processing in OS_Select when we get the io completion
570 * port done notifications. Then we call the callback.
571 *
572 * Results:
573 * -1 if error, 0 otherwise.
574 *
575 * Side effects:
576 * Asynchronous I/O operation is queued for completion.
577 *
578 *--------------------------------------------------------------
579 */
580int OS_AsyncRead(int fd, int offset, void *buf, int len,
581 OS_AsyncProc procPtr, ClientData clientData)
582{
583 int index = AIO_RD_IX(fd);
aadcc3c8 584
0198fd3c 585 ASSERT(asyncIoTable != NULL);
2b391aca 586 asyncIoInUse = TRUE;
0198fd3c 587
588 if(fd > maxFd)
589 maxFd = fd;
590
591 if(index >= asyncIoTableSize) {
592 GrowAsyncTable();
593 }
594
595 ASSERT(asyncIoTable[index].inUse == 0);
596 asyncIoTable[index].procPtr = procPtr;
597 asyncIoTable[index].clientData = clientData;
598 asyncIoTable[index].fd = fd;
599 asyncIoTable[index].len = len;
600 asyncIoTable[index].offset = offset;
601 asyncIoTable[index].buf = buf;
602 asyncIoTable[index].inUse = 1;
603 FD_SET(fd, &readFdSet);
604 return 0;
605}
606\f
607/*
608 *--------------------------------------------------------------
609 *
610 * OS_AsyncWrite --
611 *
612 * This initiates an asynchronous write on the "fake" file
613 * descriptor (which may be a file, socket, or named pipe).
614 * We also must save the ProcPtr and ClientData, so later
615 * when the io completes, we know who to call.
616 *
617 * We don't look at any results here (the WriteFile generally
618 * completes immediately) but do all completion processing
619 * in OS_DoIo when we get the io completion port done
620 * notifications. Then we call the callback.
621 *
622 * Results:
623 * -1 if error, 0 otherwise.
624 *
625 * Side effects:
626 * Asynchronous I/O operation is queued for completion.
627 *
628 *--------------------------------------------------------------
629 */
aadcc3c8 630int OS_AsyncWrite(int fd, int offset, void *buf, int len,
0198fd3c 631 OS_AsyncProc procPtr, ClientData clientData)
632{
633 int index = AIO_WR_IX(fd);
634
2b391aca 635 asyncIoInUse = TRUE;
636
0198fd3c 637 if(fd > maxFd)
638 maxFd = fd;
639
640 if(index >= asyncIoTableSize) {
641 GrowAsyncTable();
642 }
643
644 ASSERT(asyncIoTable[index].inUse == 0);
645 asyncIoTable[index].procPtr = procPtr;
646 asyncIoTable[index].clientData = clientData;
647 asyncIoTable[index].fd = fd;
648 asyncIoTable[index].len = len;
649 asyncIoTable[index].offset = offset;
650 asyncIoTable[index].buf = buf;
651 asyncIoTable[index].inUse = 1;
652 FD_SET(fd, &writeFdSet);
653 return 0;
654}
655\f
656/*
657 *--------------------------------------------------------------
658 *
659 * OS_Close --
660 *
661 * Closes the descriptor. This is a pass through to the
662 * Unix close.
663 *
664 * Results:
665 * 0 for success, -1 on failure
666 *
667 * Side effects:
668 * None.
669 *
670 *--------------------------------------------------------------
671 */
672int OS_Close(int fd)
673{
2b391aca 674 if (asyncIo) {
675 int index = AIO_RD_IX(fd);
aadcc3c8 676
2b391aca 677 FD_CLR(fd, &readFdSet);
678 FD_CLR(fd, &readFdSetPost);
679 if (asyncIoTable[index].inUse != 0) {
680 asyncIoTable[index].inUse = 0;
681 }
682
683 FD_CLR(fd, &writeFdSet);
684 FD_CLR(fd, &writeFdSetPost);
685 index = AIO_WR_IX(fd);
686 if (asyncIoTable[index].inUse != 0) {
687 asyncIoTable[index].inUse = 0;
688 }
aadcc3c8 689
2b391aca 690 if (maxFd == fd) {
691 maxFd--;
692 }
0198fd3c 693 }
0198fd3c 694 return close(fd);
695}
696\f
697/*
698 *--------------------------------------------------------------
699 *
700 * OS_CloseRead --
701 *
702 * Cancel outstanding asynchronous reads and prevent subsequent
703 * reads from completing.
704 *
705 * Results:
706 * Socket or file is shutdown. Return values mimic Unix shutdown:
707 * 0 success, -1 failure
708 *
709 *--------------------------------------------------------------
710 */
711int OS_CloseRead(int fd)
712{
713 if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
714 asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
715 FD_CLR(fd, &readFdSet);
716 }
aadcc3c8 717
0198fd3c 718 return shutdown(fd, 0);
719}
720
721\f
722/*
723 *--------------------------------------------------------------
724 *
725 * OS_DoIo --
726 *
727 * This function was formerly OS_Select. It's purpose is
728 * to pull I/O completion events off the queue and dispatch
729 * them to the appropriate place.
730 *
731 * Results:
732 * Returns 0.
733 *
734 * Side effects:
735 * Handlers are called.
736 *
737 *--------------------------------------------------------------
738 */
739int OS_DoIo(struct timeval *tmo)
740{
741 int fd, len, selectStatus;
742 OS_AsyncProc procPtr;
743 ClientData clientData;
744 AioInfo *aioPtr;
745 fd_set readFdSetCpy;
746 fd_set writeFdSetCpy;
747
2b391aca 748 asyncIoInUse = TRUE;
0198fd3c 749 FD_ZERO(&readFdSetCpy);
750 FD_ZERO(&writeFdSetCpy);
751
752 for(fd = 0; fd <= maxFd; fd++) {
753 if(FD_ISSET(fd, &readFdSet)) {
754 FD_SET(fd, &readFdSetCpy);
755 }
756 if(FD_ISSET(fd, &writeFdSet)) {
757 FD_SET(fd, &writeFdSetCpy);
758 }
759 }
aadcc3c8 760
0198fd3c 761 /*
762 * If there were no completed events from a prior call, see if there's
763 * any work to do.
764 */
765 if(numRdPosted == 0 && numWrPosted == 0) {
766 selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
767 NULL, tmo);
768 if(selectStatus < 0) {
769 exit(errno);
770 }
771
772 for(fd = 0; fd <= maxFd; fd++) {
773 /*
774 * Build up a list of completed events. We'll work off of
775 * this list as opposed to looping through the read and write
776 * fd sets since they can be affected by a callbacl routine.
777 */
778 if(FD_ISSET(fd, &readFdSetCpy)) {
779 numRdPosted++;
780 FD_SET(fd, &readFdSetPost);
781 FD_CLR(fd, &readFdSet);
782 }
783
784 if(FD_ISSET(fd, &writeFdSetCpy)) {
785 numWrPosted++;
786 FD_SET(fd, &writeFdSetPost);
787 FD_CLR(fd, &writeFdSet);
788 }
789 }
790 }
791
792 if(numRdPosted == 0 && numWrPosted == 0)
793 return 0;
aadcc3c8 794
0198fd3c 795 for(fd = 0; fd <= maxFd; fd++) {
796 /*
797 * Do reads and dispatch callback.
798 */
aadcc3c8 799 if(FD_ISSET(fd, &readFdSetPost)
0198fd3c 800 && asyncIoTable[AIO_RD_IX(fd)].inUse) {
801
802 numRdPosted--;
803 FD_CLR(fd, &readFdSetPost);
804 aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
aadcc3c8 805
0198fd3c 806 len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
807
808 procPtr = aioPtr->procPtr;
809 aioPtr->procPtr = NULL;
810 clientData = aioPtr->clientData;
811 aioPtr->inUse = 0;
812
813 (*procPtr)(clientData, len);
814 }
815
816 /*
817 * Do writes and dispatch callback.
818 */
819 if(FD_ISSET(fd, &writeFdSetPost) &&
820 asyncIoTable[AIO_WR_IX(fd)].inUse) {
821
822 numWrPosted--;
823 FD_CLR(fd, &writeFdSetPost);
824 aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
aadcc3c8 825
0198fd3c 826 len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
827
828 procPtr = aioPtr->procPtr;
829 aioPtr->procPtr = NULL;
830 clientData = aioPtr->clientData;
831 aioPtr->inUse = 0;
832 (*procPtr)(clientData, len);
833 }
834 }
835 return 0;
836}
837
838\f
839/*
840 *----------------------------------------------------------------------
841 *
842 * ClientAddrOK --
843 *
844 * Checks if a client address is in a list of allowed addresses
845 *
846 * Results:
847 * TRUE if address list is empty or client address is present
848 * in the list, FALSE otherwise.
849 *
850 *----------------------------------------------------------------------
851 */
0b7c9662 852static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
0198fd3c 853{
854 int result = FALSE;
855 char *clientListCopy, *cur, *next;
856 char *newString = NULL;
857 int strLen;
858
859 if(clientList == NULL || *clientList == '\0') {
860 return TRUE;
861 }
862
863 strLen = strlen(clientList);
aadcc3c8 864 clientListCopy = (char *)malloc(strLen + 1);
6f902b14 865 ASSERT(newString != NULL);
0198fd3c 866 memcpy(newString, clientList, strLen);
867 newString[strLen] = '\000';
aadcc3c8 868
0198fd3c 869 for(cur = clientListCopy; cur != NULL; cur = next) {
870 next = strchr(cur, ',');
871 if(next != NULL) {
872 *next++ = '\0';
873 }
874 if(inet_addr(cur) == saPtr->sin_addr.s_addr) {
875 result = TRUE;
876 break;
877 }
878 }
879 free(clientListCopy);
880 return result;
881}
882
883\f
884/*
885 *----------------------------------------------------------------------
886 *
887 * AcquireLock --
888 *
889 * On platforms that implement concurrent calls to accept
890 * on a shared listening ipcFd, returns 0. On other platforms,
891 * acquires an exclusive lock across all processes sharing a
892 * listening ipcFd, blocking until the lock has been acquired.
893 *
894 * Results:
895 * 0 for successful call, -1 in case of system error (fatal).
896 *
897 * Side effects:
898 * This process now has the exclusive lock.
899 *
900 *----------------------------------------------------------------------
901 */
0b7c9662 902static int AcquireLock(int sock, int fail_on_intr)
0198fd3c 903{
904#ifdef USE_LOCKING
0b7c9662 905 do {
906 struct flock lock;
907 lock.l_type = F_WRLCK;
908 lock.l_start = 0;
909 lock.l_whence = SEEK_SET;
910 lock.l_len = 0;
911
912 if (fcntl(sock, F_SETLKW, &lock) != -1)
913 return 0;
914 } while (errno == EINTR && !fail_on_intr);
915
916 return -1;
917
918#else
0198fd3c 919 return 0;
0b7c9662 920#endif
0198fd3c 921}
922\f
923/*
924 *----------------------------------------------------------------------
925 *
926 * ReleaseLock --
927 *
928 * On platforms that implement concurrent calls to accept
929 * on a shared listening ipcFd, does nothing. On other platforms,
930 * releases an exclusive lock acquired by AcquireLock.
931 *
932 * Results:
933 * 0 for successful call, -1 in case of system error (fatal).
934 *
935 * Side effects:
936 * This process no longer holds the lock.
937 *
938 *----------------------------------------------------------------------
939 */
0b7c9662 940static int ReleaseLock(int sock)
0198fd3c 941{
942#ifdef USE_LOCKING
0b7c9662 943 do {
944 struct flock lock;
945 lock.l_type = F_UNLCK;
946 lock.l_start = 0;
947 lock.l_whence = SEEK_SET;
948 lock.l_len = 0;
949
950 if (fcntl(sock, F_SETLK, &lock) != -1)
951 return 0;
952 } while (errno == EINTR);
0198fd3c 953
0b7c9662 954 return -1;
955
956#else
0198fd3c 957 return 0;
0b7c9662 958#endif
0198fd3c 959}
960
961\f
69f62c0e 962/**********************************************************************
aadcc3c8 963 * Determine if the errno resulting from a failed accept() warrants a
69f62c0e 964 * retry or exit(). Based on Apache's http_main.c accept() handling
965 * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
966 */
967static int is_reasonable_accept_errno (const int error)
968{
969 switch (error) {
970#ifdef EPROTO
aadcc3c8 971 /* EPROTO on certain older kernels really means ECONNABORTED, so
972 * we need to ignore it for them. See discussion in new-httpd
973 * archives nh.9701 search for EPROTO. Also see nh.9603, search
974 * for EPROTO: There is potentially a bug in Solaris 2.x x<6, and
975 * other boxes that implement tcp sockets in userland (i.e. on top of
976 * STREAMS). On these systems, EPROTO can actually result in a fatal
977 * loop. See PR#981 for example. It's hard to handle both uses of
69f62c0e 978 * EPROTO. */
979 case EPROTO:
980#endif
981#ifdef ECONNABORTED
982 case ECONNABORTED:
983#endif
984 /* Linux generates the rest of these, other tcp stacks (i.e.
985 * bsd) tend to hide them behind getsockopt() interfaces. They
986 * occur when the net goes sour or the client disconnects after the
987 * three-way handshake has been done in the kernel but before
988 * userland has picked up the socket. */
989#ifdef ECONNRESET
990 case ECONNRESET:
991#endif
992#ifdef ETIMEDOUT
993 case ETIMEDOUT:
994#endif
995#ifdef EHOSTUNREACH
996 case EHOSTUNREACH:
997#endif
998#ifdef ENETUNREACH
999 case ENETUNREACH:
1000#endif
1001 return 1;
1002
1003 default:
1004 return 0;
1005 }
1006}
1007
1008/**********************************************************************
aadcc3c8 1009 * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1010 * others?). When a connect() is made to a Unix Domain socket, but its
1011 * not accept()ed before the web server gets impatient and close()s, an
1012 * accept() results in a valid file descriptor, but no data to read.
1013 * This causes a block on the first read() - which never returns!
1014 *
1015 * Another approach to this is to write() to the socket to provoke a
1016 * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1017 * that whatever is written has to be universally ignored by all FastCGI
1018 * web servers, and a SIGPIPE handler has to be installed which returns
1019 * (or SIGPIPE is ignored).
1020 *
1021 * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1022 *
1023 * Making it shorter is probably safe, but I'll leave that to you. Making
1024 * it 0,0 doesn't work reliably. The shorter you can reliably make it,
1025 * the faster your application will be able to recover (waiting 2 seconds
1026 * may _cause_ the problem when there is a very high demand). At any rate,
1027 * this is better than perma-blocking.
1028 */
69f62c0e 1029static int is_af_unix_keeper(const int fd)
1030{
1031 struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1032 fd_set read_fds;
1033
1034 FD_ZERO(&read_fds);
1035 FD_SET(fd, &read_fds);
aadcc3c8 1036
69f62c0e 1037 return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1038}
1039
0198fd3c 1040/*
1041 *----------------------------------------------------------------------
1042 *
0b7c9662 1043 * OS_Accept --
0198fd3c 1044 *
1045 * Accepts a new FastCGI connection. This routine knows whether
1046 * we're dealing with TCP based sockets or NT Named Pipes for IPC.
1047 *
1048 * Results:
1049 * -1 if the operation fails, otherwise this is a valid IPC fd.
1050 *
1051 * Side effects:
1052 * New IPC connection is accepted.
1053 *
1054 *----------------------------------------------------------------------
1055 */
1dd5d7a8 1056int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
0198fd3c 1057{
1058 int socket;
69f62c0e 1059 union {
0198fd3c 1060 struct sockaddr_un un;
a3c43615 1061 struct sockaddr_in in;
0198fd3c 1062 } sa;
ed728efb 1063
0b7c9662 1064 for (;;) {
1065 if (AcquireLock(listen_sock, fail_on_intr))
1066 return -1;
a3c43615 1067
0b7c9662 1068 for (;;) {
69f62c0e 1069 do {
0b7c9662 1070#ifdef HAVE_SOCKLEN
1071 socklen_t len = sizeof(sa);
1072#else
1073 int len = sizeof(sa);
1074#endif
1075 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1076 } while (socket < 0 && errno == EINTR && !fail_on_intr);
a3c43615 1077
69f62c0e 1078 if (socket < 0) {
1079 if (!is_reasonable_accept_errno(errno)) {
a3c43615 1080 int errnoSave = errno;
0b7c9662 1081 ReleaseLock(listen_sock);
a3c43615 1082 errno = errnoSave;
69f62c0e 1083 return (-1);
a3c43615 1084 }
69f62c0e 1085 errno = 0;
1086 }
0b7c9662 1087 else { /* socket >= 0 */
69f62c0e 1088 int set = 1;
aadcc3c8 1089
69f62c0e 1090 if (sa.in.sin_family != AF_INET)
1091 break;
aadcc3c8 1092
69f62c0e 1093#ifdef TCP_NODELAY
1094 /* No replies to outgoing data, so disable Nagle */
1095 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
aadcc3c8 1096#endif
1097
69f62c0e 1098 /* Check that the client IP address is approved */
1dd5d7a8 1099 if (ClientAddrOK(&sa.in, webServerAddrs))
69f62c0e 1100 break;
aadcc3c8 1101
69f62c0e 1102 close(socket);
0b7c9662 1103 } /* socket >= 0 */
1104 } /* for(;;) */
aadcc3c8 1105
0b7c9662 1106 if (ReleaseLock(listen_sock))
69f62c0e 1107 return (-1);
aadcc3c8 1108
69f62c0e 1109 if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1110 break;
aadcc3c8 1111
69f62c0e 1112 close(socket);
1113 } /* while(1) - lock */
a3c43615 1114
a3c43615 1115 return (socket);
0198fd3c 1116}
1117\f
1118/*
1119 *----------------------------------------------------------------------
1120 *
1121 * OS_IpcClose
1122 *
1123 * OS IPC routine to close an IPC connection.
1124 *
1125 * Results:
1126 *
1127 *
1128 * Side effects:
1129 * IPC connection is closed.
1130 *
1131 *----------------------------------------------------------------------
1132 */
1133int OS_IpcClose(int ipcFd)
1134{
1135 return OS_Close(ipcFd);
1136}
1137
1138\f
1139/*
1140 *----------------------------------------------------------------------
1141 *
1142 * OS_IsFcgi --
1143 *
1144 * Determines whether this process is a FastCGI process or not.
1145 *
1146 * Results:
1147 * Returns 1 if FastCGI, 0 if not.
1148 *
1149 * Side effects:
1150 * None.
1151 *
1152 *----------------------------------------------------------------------
1153 */
0b7c9662 1154int OS_IsFcgi(int sock)
0198fd3c 1155{
b22c3782 1156 union {
1157 struct sockaddr_in in;
1158 struct sockaddr_un un;
1159 } sa;
8eac3e1b 1160#ifdef HAVE_SOCKLEN
ed728efb 1161 socklen_t len = sizeof(sa);
1162#else
b22c3782 1163 int len = sizeof(sa);
ed728efb 1164#endif
0198fd3c 1165
0b7c9662 1166 if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1167 return TRUE;
1168 }
1169 else {
1170 return FALSE;
1171 }
0198fd3c 1172}
1173\f
1174/*
1175 *----------------------------------------------------------------------
1176 *
1177 * OS_SetFlags --
1178 *
1179 * Sets selected flag bits in an open file descriptor.
1180 *
1181 *----------------------------------------------------------------------
1182 */
1183void OS_SetFlags(int fd, int flags)
1184{
1185 int val;
1186 if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1187 exit(errno);
1188 }
1189 val |= flags;
1190 if(fcntl(fd, F_SETFL, val) < 0) {
1191 exit(errno);
1192 }
1193}