Watt-32 tcp/ip  2.2 dev-rel.10
accept.c
Go to the documentation of this file.
1 
5 /* BSD sockets functionality for Watt-32 TCP/IP
6  *
7  * Copyright (c) 1997-2002 Gisle Vanem <gvanem@yahoo.no>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  * must display the following acknowledgement:
19  * This product includes software developed by Gisle Vanem
20  * Bergen, Norway.
21  *
22  * THIS SOFTWARE IS PROVIDED BY ME (Gisle Vanem) AND CONTRIBUTORS ``AS IS''
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL I OR CONTRIBUTORS BE LIABLE FOR ANY
26  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
29  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Version
34  *
35  * 0.5 : Dec 18, 1997 : G. Vanem - created
36  * 0.6 : Sep 16, 1999 : fixes by Claus Oberste-Brandenburg
37  * 0.7 : Nov 22, 1999 : G. Vanem - simplified the socket 'dup' action.
38  * Simply allocate a new 'remote_addr'.
39  * Poll backlogged listen-queue in accept-loop.
40  * 0.8 : Dec 15, 1999 : Listen-queue filled in _sock_append() and TCB
41  * is cloned when SYN is received in tcp_listen_state().
42  * 0.9 : Mar 09, 2000 : Plugged a memory leak in dup_bind() where 'tcp_sock'
43  * memory from socket() wasn't free'ed. Thanks to
44  * Francisco Pastor <fpastor.etra-id@etra.es> for
45  * finding this.
46  * 0.91: Jun 01, 2000 : Rearranged accept-loop for EWOULDBLOCK with no
47  * listen_queue element (dropped 'continue' construct).
48  * 0.92: Aug 07, 2002 : G. Vanem - added AF_INET6 support
49  */
50 
51 #include "socket.h"
52 
53 #if defined(USE_BSD_API)
54 
55 static int dup_bind (Socket *socket, Socket **clone, int idx);
56 static int alloc_addr (Socket *socket, Socket *clone);
57 static void listen_free(Socket *socket, int idx);
58 
59 int W32_CALL accept (int s, struct sockaddr *addr, int *addrlen)
60 {
61  Socket *socket, *clone = NULL;
62  volatile DWORD timeout;
63  volatile int newsock = -1;
64  volatile int que_idx;
65  volatile int maxconn;
66  volatile BOOL is_ip6;
67 
68  socket = _socklist_find (s);
69 
70  SOCK_PROLOGUE (socket, "\naccept:%d", s);
71 
72  is_ip6 = (socket->so_family == AF_INET6);
73 
74  if (socket->so_type != SOCK_STREAM)
75  {
76  SOCK_DEBUGF ((", EOPNOTSUPP"));
77  SOCK_ERRNO (EOPNOTSUPP);
78  return (-1);
79  }
80 
81  if (!socket->local_addr)
82  {
83  SOCK_DEBUGF ((", not bound"));
84  SOCK_ERRNO (ENOTCONN);
85  return (-1);
86  }
87 
88  if (!(socket->so_options & SO_ACCEPTCONN)) /* listen() not called */
89  {
90  SOCK_DEBUGF ((", not SO_ACCEPTCONN"));
91  SOCK_ERRNO (EINVAL);
92  return (-1);
93  }
94 
95  if (!(socket->so_state & (SS_ISLISTENING | SS_ISCONNECTING)))
96  {
97  SOCK_DEBUGF ((", not listening"));
98  SOCK_ERRNO (ENOTCONN);
99  return (-1);
100  }
101 
102  if (addr && addrlen)
103  {
104  int sa_len = is_ip6 ? sizeof(struct sockaddr_in6) :
105  sizeof(struct sockaddr_in);
106  if (*addrlen < sa_len)
107  {
108  SOCK_DEBUGF ((", EFAULT"));
109  SOCK_ERRNO (EFAULT);
110  return (-1);
111  }
112  VERIFY_RW (addr, sa_len);
113  }
114 
115  /* Get max possible TCBs on listen-queue.
116  * Some (or all) may be NULL until a SYN comes in.
117  */
118  maxconn = socket->backlog;
119  if (maxconn < 1 || maxconn > SOMAXCONN)
120  {
121  SOCK_FATAL (("%s(%d): Illegal socket backlog %d\n",
122  __FILE__, __LINE__, maxconn));
123  SOCK_ERRNO (EINVAL);
124  return (-1);
125  }
126 
127  /* A listening socket should have infinite timeout unless
128  * option SO_RCVTIMEO used. I.e. socket->timeout = 0.
129  * Ref. listen.c.
130  */
131  if (socket->timeout)
132  timeout = set_timeout (1000 * socket->timeout);
133  else timeout = 0UL;
134 
135  if (_sock_sig_setup() < 0)
136  {
137  SOCK_ERRNO (EINTR);
138  goto accept_fail;
139  }
140 
142 
143  /* Loop over all queue-slots and accept first connected TCB
144  */
145  for (que_idx = 0; ; que_idx = (que_idx+1) % maxconn)
146  {
147  _tcp_Socket *sk = socket->listen_queue [que_idx];
148 
149  tcp_tick (NULL);
150 
151  WATT_YIELD();
152 
153  if (_sock_sig_pending())
154  {
155  SOCK_DEBUGF ((", EINTR"));
156  SOCK_ERRNO (EINTR);
157  goto accept_fail;
158  }
159 
160  /* No SYNs received yet. This shouldn't happen if we called 'accept()'
161  * after 'select_s()' said that socket was readable. (At least one
162  * connection on the listen-queue).
163  */
164  if (sk)
165  {
166  /* This could happen if 'accept()' was called too long after connection
167  * was established and then closed by peer. This could also happen if
168  * someone did a portscan on us. I.e. he sent 'SYN', we replied with
169  * 'SYN+ACK' and he never sent an 'ACK'. Thus we timeout in
170  * 'tcp_Retransmitter()' and abort the TCB.
171  *
172  * Queue slot is in any case ready for another 'SYN' to come and be
173  * handled by '_sock_append()'.
174  */
175  if (sk->state >= tcp_StateESTCL && sk->ip_type == 0)
176  {
177  SOCK_DEBUGF ((", aborted TCB (idx %d)", que_idx));
178  listen_free (socket, que_idx);
179  continue;
180  }
181 
185  if (sk->state >= tcp_StateESTAB && sk->state < tcp_StateCLOSED)
186  {
187  SOCK_DEBUGF ((", connected! (idx %d)", que_idx));
188  break;
189  }
190  }
191 
192  /* We've polled all listen-queue slots and none are connected.
193  * Return fail if socket is non-blocking.
194  */
195  if (que_idx == maxconn-1 && (socket->so_state & SS_NBIO))
196  {
197  SOCK_DEBUGF ((", would block"));
198  SOCK_ERRNO (EWOULDBLOCK);
199  goto accept_fail;
200  }
201 
202  if (chk_timeout(timeout))
203  {
204  SOCK_DEBUGF ((", ETIMEDOUT"));
205  SOCK_ERRNO (ETIMEDOUT);
206  goto accept_fail;
207  }
208  }
209 
210  /* We're here only when above 'tcp_established()' succeeded.
211  * Now duplicate 'socket' into a new listening socket 'clone'
212  * with handle 'newsock'.
213  */
214  SOCK_ENTER_SCOPE();
215  newsock = dup_bind (socket, &clone, que_idx);
216  if (newsock < 0)
217  goto accept_fail;
218 
219  if (!alloc_addr(socket, clone))
220  {
221  SOCK_DEL_FD (newsock);
222  goto accept_fail;
223  }
224 
225  /* Clone is connected, but *not* listening/accepting.
226  * Note: other 'so_state' bits from parent is unchanged.
227  * e.g. clone may be non-blocking.
228  */
229  clone->so_state |= SS_ISCONNECTED;
230  clone->so_state &= ~(SS_ISLISTENING | SS_ISCONNECTING);
231  clone->so_options &= ~SO_ACCEPTCONN;
232 
233 #if 1
234  /* Prevent a PUSH on first segment sent.
235  */
236  sock_noflush ((sock_type*)clone->tcp_sock);
237 #endif
238 
239 #if defined(USE_IPV6)
240  if (is_ip6)
241  {
242  struct sockaddr_in6 *ra = (struct sockaddr_in6*)clone->remote_addr;
243 
244  SOCK_DEBUGF (("\nremote %s (%u)",
245  _inet6_ntoa (&ra->sin6_addr), ntohs(ra->sin6_port)));
246  ARGSUSED (ra);
247  }
248  else
249 #endif
250  SOCK_DEBUGF (("\nremote %s (%u)",
251  inet_ntoa (clone->remote_addr->sin_addr),
252  ntohs (clone->remote_addr->sin_port)));
253 
254  if (addr && addrlen)
255  {
256 #if defined(USE_IPV6)
257  if (socket->so_family == AF_INET6)
258  {
259  struct sockaddr_in6 *sa = (struct sockaddr_in6*)addr;
260 
261  memset (sa, 0, sizeof(*sa));
262  memcpy (&sa->sin6_addr, &clone->remote_addr->sin_addr, sizeof(sa->sin6_addr));
263  sa->sin6_family = AF_INET6;
264  sa->sin6_port = clone->remote_addr->sin_port;
265  sa->sin6_flowinfo = sa->sin6_scope_id = 0; /* !! */
266  *addrlen = sizeof(*sa);
267  }
268  else
269 #endif
270  {
271  struct sockaddr_in *sa = (struct sockaddr_in*)addr;
272 
273  sa->sin_family = AF_INET;
274  sa->sin_port = clone->remote_addr->sin_port;
275  sa->sin_addr = clone->remote_addr->sin_addr;
276  memset (sa->sin_zero, 0, sizeof(sa->sin_zero));
277  *addrlen = sizeof(*sa);
278  }
279  }
280 
281  SOCK_LEAVE_SCOPE();
282  _sock_crit_stop();
283  _sock_sig_restore();
284  return (newsock);
285 
286 accept_fail:
287  SOCK_LEAVE_SCOPE();
288  _sock_crit_stop();
289  _sock_sig_restore();
290  return (-1);
291 }
292 
293 
294 /*
295  * Duplicate a SOCK_STREAM 'sock' to '*newconn'. Doesn't set
296  * local/remote addresses. Transfer TCB from listen-queue[idx] of
297  * 'sock' to TCB of 'clone'.
298  */
299 static int dup_bind (Socket *sock, Socket **newconn, int idx)
300 {
301  Socket *clone;
302  int fd = socket (sock->so_family, SOCK_STREAM, IPPROTO_TCP);
303 
304  if (fd < 0)
305  return (fd);
306 
307  clone = _socklist_find (fd); /* cannot fail */
308 
309  /* child gets state from parent
310  */
311  clone->timeout = sock->timeout;
312  clone->close_time = sock->close_time;
313  clone->keepalive = sock->keepalive;
314  clone->ip_tos = sock->ip_tos;
315  clone->ip_ttl = sock->ip_ttl;
316  clone->so_state = sock->so_state;
317  clone->so_options = sock->so_options;
318 
319  /* TCB for clone is from listen-queue[idx]; free tcp_sock from
320  * socket(). Reuse listen-queue slot for another SYN.
321  */
322  free (clone->tcp_sock);
323  clone->tcp_sock = sock->listen_queue[idx];
324  sock->listen_queue [idx] = NULL;
325  sock->syn_timestamp[idx] = 0UL;
326  *newconn = clone;
327  return (fd);
328 }
329 
330 /*
331  * Allocate and fill local/remote addresses for 'clone'.
332  * Take local address from 'socket', and remote address from
333  * TCB of clone.
334  */
335 static int alloc_addr (Socket *socket, Socket *clone)
336 {
337  BOOL is_ip6 = (socket->so_family == AF_INET6);
338  int sa_len = is_ip6 ? sizeof(struct sockaddr_in6) :
339  sizeof(struct sockaddr_in);
340 
341  clone->local_addr = SOCK_CALLOC (sa_len);
342  if (!clone->local_addr)
343  {
344  SOCK_DEBUGF ((", ENOMEM"));
345  SOCK_ERRNO (ENOMEM);
346  return (0);
347  }
348 
349  clone->remote_addr = SOCK_CALLOC (sa_len);
350  if (!clone->remote_addr)
351  {
352  SOCK_DEBUGF ((", ENOMEM"));
353  SOCK_ERRNO (ENOMEM);
354  free (clone->local_addr);
355  clone->local_addr = NULL;
356  return (0);
357  }
358 
359 #if defined(USE_IPV6)
360  if (is_ip6)
361  {
362  struct sockaddr_in6 *la = (struct sockaddr_in6*) clone->local_addr;
363  struct sockaddr_in6 *ra = (struct sockaddr_in6*) clone->remote_addr;
364  struct sockaddr_in6 *sa = (struct sockaddr_in6*) socket->local_addr;
365 
366  la->sin6_family = AF_INET6;
367  la->sin6_port = sa->sin6_port;
368  memcpy (&la->sin6_addr, &sa->sin6_addr, sizeof(la->sin6_addr));
369 
370  ra->sin6_family = AF_INET6;
371  ra->sin6_port = htons (clone->tcp_sock->hisport);
372  memcpy (&ra->sin6_addr, &clone->tcp_sock->his6addr, sizeof(ra->sin6_addr));
373  }
374  else
375 #endif
376  {
377  struct in_addr peer;
378 
379  peer.s_addr = htonl (clone->tcp_sock->hisaddr);
380  clone->local_addr->sin_family = AF_INET;
381  clone->local_addr->sin_port = socket->local_addr->sin_port;
382  clone->local_addr->sin_addr = socket->local_addr->sin_addr;
383 
384  clone->remote_addr->sin_family = AF_INET;
385  clone->remote_addr->sin_port = htons (clone->tcp_sock->hisport);
386  clone->remote_addr->sin_addr = peer;
387  }
388  ARGSUSED (is_ip6);
389  return (1);
390 }
391 
392 /*
393  * Release a listen-queue slot and associated memory.
394  */
395 static void listen_free (Socket *socket, int idx)
396 {
397  _tcp_Socket *tcb = socket->listen_queue [idx];
398 
399  _tcp_unthread (tcb, TRUE);
400  _sock_free_rcv_buf ((sock_type*)tcb); /* free large Rx buffer */
401  free (tcb);
402  socket->listen_queue [idx] = NULL;
403 }
404 
419 {
420  _tcp_Socket *clone;
421  _tcp_Socket *orig = *tcp;
422  Socket *sock = NULL; /* associated socket for 'orig' */
423  int i;
424 
425  /* Lookup BSD-socket for TCB
426  */
427  if (!_bsd_socket_hook ||
428  (sock = (*_bsd_socket_hook)(BSO_FIND_SOCK,orig)) == NULL)
429  {
430  /* This could be a native Wattcp socket. Pass it on in
431  * tcp_listen_state() for further processing.
432  */
433  SOCK_DEBUGF (("\n sock_append: not found!?"));
434  return (1);
435  }
436 
437  SOCK_DEBUGF (("\n sock_append:%d", sock->fd));
438 
439  if (!(sock->so_options & SO_ACCEPTCONN))
440  {
441  SOCK_DEBUGF ((", not SO_ACCEPTCONN"));
442  return (0); /* How could this happen (SYN attack)? */
443  }
444 
445  /* Find the first vacant slot for this clone
446  */
447  for (i = 0; i < sock->backlog; i++)
448  if (!sock->listen_queue[i])
449  break;
450 
453  if (i >= sock->backlog || i >= SOMAXCONN)
454  {
457  SOCK_DEBUGF ((", queue full (idx %d)", i));
458  return (0);
459  }
460 
461  SOCK_DEBUGF ((", idx %d", i));
462 
463  clone = (_tcp_Socket*) SOCK_CALLOC (sizeof(*clone));
464  if (!clone)
465  {
466  SOCK_DEBUGF ((", ENOMEM"));
467  return (0);
468  }
469 
470  /* Link in the semi-connected socket (SYN received, ACK will be sent)
471  */
472  sock->listen_queue[i] = clone;
473  sock->syn_timestamp[i] = set_timeout (0);
474 
475  /* Copy the TCB to clone. Tx buffer of clone must not
476  * be set to parent's Tx buffer.
477  */
478  memcpy (clone, orig, sizeof(*clone));
479  clone->tx_data = &clone->tx_buf[0];
480  clone->tx_datalen = 0;
481  clone->max_tx_data = sizeof (clone->tx_buf) - 1;
482 
483 #if defined(USE_DEBUG) /* !!needs some work */
484  clone->last_acknum[0] = orig->last_acknum[0];
485  clone->last_acknum[1] = orig->last_acknum[1];
486  clone->last_seqnum[0] = orig->last_seqnum[0];
487  clone->last_seqnum[1] = orig->last_seqnum[1];
488  orig->last_acknum[0] = orig->last_acknum[1] = 0L;
489  orig->last_seqnum[0] = orig->last_seqnum[1] = 0L;
490 #endif
491 
492  /* Increase the TCP window (to 16kB by default)
493  */
495 
496  /* Undo what tcp_handler() and tcp_listen_state() did to
497  * this listening socket.
498  */
499  orig->hisport = 0;
500  orig->hisaddr = 0;
501  orig->myaddr = 0;
502 #if defined(USE_IPV6)
503  memset (&orig->my6addr, 0, sizeof(orig->my6addr));
504 #endif
505 
506  orig->send_next = INIT_SEQ(); /* set new ISS */
507  orig->unhappy = FALSE;
508  CLR_PEER_MAC_ADDR (orig);
509 
510  clone->next = _tcp_allsocs;
511  _tcp_allsocs = clone; /* prepend clone to TCB-list */
512  *tcp = clone; /* the new TCB is now the clone */
513  return (1);
514 }
515 
516 /*
517  A handy note from:
518  http://help.netscape.com/kb/corporate/960513-73.html
519 
520 Solaris 2.3, 2.4, and 2.5 have a listen backlog queue for incoming
521 TCP/IP connections; its maximum length is 5 by default. If you leave
522 this set to the default, then you will frequently see connections to
523 your web server time out under moderate loads, even though there are
524 enough idle web server listener processes available to handle the
525 connections and your Solaris system shows no other signs of resource
526 saturation.
527 
528 The listen backlog queue holds connections that are "half-open" (in
529 the process of being opened), as well as connections that have been
530 fully opened but have not yet been accepted by any local processes.
531 This has no effect on the total number of open TCP/IP connections that
532 your Solaris system can deal with at once; it only means that your
533 system can't juggle more than five loose connections at a time, and
534 any other connections that come in while Solaris is busy with five
535 loose connections will be dropped and will time out on the client end.
536 
537 On Solaris 2.3 and 2.4, you can bring the maximum queue length from 5
538 up to 32 by using the "ndd" command, which must be run as root:
539 
540  /usr/sbin/ndd -set /dev/tcp tcp_conn_req_max 32
541 
542  It is theoretically possible to increase this number beyond 32,
543  although this is not recommended. If increasing the maximum length
544  from 5 to 32 solved the problem temporarily for you but your web
545  server's traffic has now increased to a point where the symptoms
546  appear with the maximum queue length set to 32, then you should
547  contact Sun for further help with this.
548 
549  The Netscape servers on any Unix system will request a listen backlog
550  queue length of 128 when they run; the operating system then reduces
551  that to something it can handle.
552 
553  Solaris 2.5 allows a maximum listen backlog queue length as high as
554  1024 (and you can raise it in the same way), but it still defaults to
555  a maximum queue length of 5.
556 
557 */
558 
559 #endif /* USE_BSD_API */
int _sock_append(_tcp_Socket **tcp)
Called from tcp_fsm.c / tcp_listen_state() (via _bsd_socket_hook) to append a new connection to the l...
Definition: accept.c:418
BYTE * tx_data
Tx data buffer (default tx_buf[])
Definition: wattcp.h:682
Definition: socket.h:137
DWORD tcp_recv_win
RWIN for BSD sockets only.
Definition: pctcp.c:135
UINT max_tx_data
Last index for tx_data[].
Definition: wattcp.h:681
Definition: if.h:84
ip6_address his6addr
peer's IPv6 address
Definition: wattcp.h:620
UINT tx_datalen
number of bytes of data to send
Definition: wattcp.h:680
void _sock_free_rcv_buf(sock_type *s)
Free receive buffer associated with udp/tcp sockets.
Definition: socket.c:267
ip6_address my6addr
our IPv6 address
Definition: wattcp.h:619
DWORD W32_CALL set_timeout(DWORD msec)
Return time for when given timeout (msec) expires.
Definition: timer.c:503
Socket * _socklist_find(int s)
Returns a pointer to the Socket structure associated with socket 's'.
Definition: socket.c:1534
int _sock_set_rcv_buf(sock_type *s, size_t len)
Setup a bigger receive buffer, the default in Wattcp is only 2k.
Definition: socket.c:252
BYTE tx_buf[tcp_MaxTxBufSize+1]
data for transmission
Definition: wattcp.h:683
Definition: in.h:146
BYTE unhappy
flag, indicates retransmitting segt's
Definition: wattcp.h:632
_tcp_Socket * _tcp_unthread(_tcp_Socket *ds, BOOL free_tx)
Unthread a socket from the tcp socket list, if it's there.
Definition: pctcp.c:596
int W32_CALL socket(int family, int type, int protocol)
socket().
Definition: socket.c:1794
void _sock_crit_start(void)
Start a critical region.
Definition: socket.c:1296
void _sock_crit_stop(void)
Mark the end of a critical region.
Definition: socket.c:1306
void *MS_CDECL * _bsd_socket_hook(enum BSD_SOCKET_OPS op,...)
This hook is to prevent the BSD-socket API being linked in by default.
void W32_CALL sock_noflush(sock_type *s)
Sets non-flush mode on next TCP write.
Definition: pctcp.c:3088
int W32_CALL accept(int s, struct sockaddr *addr, int *addrlen)
Definition: accept.c:59
const char * _inet6_ntoa(const void *ip)
Convert an IPv6-address 'ip' into a string.
Definition: netaddr.c:401
WORD W32_CALL tcp_tick(sock_type *s)
Must be called periodically by user application (or BSD socket API).
Definition: pctcp.c:1389
_tcp_Socket * _tcp_allsocs
list of tcp-sockets
Definition: pctcp.c:137
DWORD last_seqnum[2]
increments
Definition: wattcp.h:629
DWORD last_acknum[2]
for pcdbug.c; to follow SEQ/ACK
Definition: wattcp.h:628
BOOL W32_CALL chk_timeout(DWORD value)
Check if milli-sec value has expired:
Definition: timer.c:547
DWORD send_next
SEQ we send but not ACK-ed by peer.
Definition: wattcp.h:624
struct tcp_Socket * next
link to next tcp-socket
Definition: wattcp.h:613
UINT state
tcp connection state
Definition: wattcp.h:622