tools/llvm: Do not build with symbols
[minix3.git] / minix / net / lwip / tcp.c
blob1f4fc3d14844d75e8781af5870483e5f23e2f278
1 #include <stdlib.h>
2 #include <assert.h>
3 #include <minix/sysutil.h>
5 #include <sys/ioc_net.h>
6 #include <net/gen/in.h>
7 #include <net/gen/tcp.h>
8 #include <net/gen/tcp_io.h>
10 #include <lwip/tcp.h>
11 #include <lwip/tcp_impl.h>
12 #include <lwip/ip_addr.h>
14 #include <minix/netsock.h>
15 #include "proto.h"
17 #define TCP_BUF_SIZE (32 << 10)
19 #define sock_alloc_buf(s) debug_malloc(s)
20 #define sock_free_buf(x) debug_free(x)
22 static int do_tcp_debug;
24 #if 0
25 #define debug_tcp_print(str, ...) printf("LWIP %s:%d : " str "\n", \
26 __func__, __LINE__, ##__VA_ARGS__)
27 #else
28 #define debug_tcp_print(...) debug_print(__VA_ARGS__)
29 #endif
31 struct wbuf {
32 unsigned int len;
33 unsigned int written;
34 unsigned int unacked;
35 unsigned int rem_len;
36 struct wbuf * next;
37 char data[];
40 struct wbuf_chain {
41 struct wbuf * head;
42 struct wbuf * tail;
43 struct wbuf * unsent; /* points to the first buffer that contains unsent
44 data. It may point anywhere between head and
45 tail */
48 static void tcp_error_callback(void *arg, err_t err)
50 int perr;
51 struct socket * sock = (struct socket *) arg;
53 debug_tcp_print("socket num %ld err %d", get_sock_num(sock), err);
55 switch (err) {
56 case ERR_RST:
57 perr = ECONNREFUSED;
58 break;
59 case ERR_CLSD:
60 perr = EPIPE;
61 break;
62 case ERR_CONN:
63 perr = ENOTCONN;
64 break;
65 default:
66 perr = EIO;
69 /* FIXME: what if this is for a write that was already replied to? */
70 if (sock->flags & SOCK_FLG_OP_PENDING) {
71 send_req_reply(&sock->req, perr);
72 sock->flags &= ~SOCK_FLG_OP_PENDING;
73 } else if (sock_select_set(sock))
74 sock_select_notify(sock);
76 * When error callback is called the tcb either does not exist anymore
77 * or is going to be deallocated soon after. We must not use the pcb
78 * anymore
80 sock->pcb = NULL;
83 static int tcp_fill_new_socket(struct socket * sock, struct tcp_pcb * pcb)
85 struct wbuf_chain * wc;
87 if (!(wc = malloc(sizeof(struct wbuf_chain))))
88 return ENOMEM;
90 wc-> head = wc->tail = wc->unsent = NULL;
91 sock->buf = wc;
92 sock->buf_size = 0;
94 sock->pcb = pcb;
95 tcp_arg(pcb, sock);
96 tcp_err(pcb, tcp_error_callback);
97 tcp_nagle_disable(pcb);
99 return OK;
102 static int tcp_op_open(struct socket * sock)
104 struct tcp_pcb * pcb;
105 int ret;
107 debug_tcp_print("socket num %ld", get_sock_num(sock));
109 if (!(pcb = tcp_new()))
110 return ENOMEM;
111 debug_tcp_print("new tcp pcb %p\n", pcb);
113 if ((ret = tcp_fill_new_socket(sock, pcb) != OK))
114 tcp_abandon(pcb, 0);
116 return ret;
119 static void tcp_recv_free(__unused void * data)
121 pbuf_free((struct pbuf *) data);
124 static void tcp_backlog_free(void * data)
126 tcp_abort((struct tcp_pcb *) data);
129 static void free_wbuf_chain(struct wbuf_chain * wc)
131 struct wbuf * wb;
133 assert(wc != NULL);
135 wb = wc->head;
136 while (wb) {
137 struct wbuf * w = wb;
138 debug_tcp_print("freeing wbuf %p", wb);
139 wb = wb->next;
140 debug_free(w);
143 debug_free(wc);
146 static int tcp_op_close(struct socket * sock)
148 debug_tcp_print("socket num %ld", get_sock_num(sock));
150 if (sock->flags & SOCK_FLG_OP_LISTENING)
151 sock_dequeue_data_all(sock, tcp_backlog_free);
152 else
153 sock_dequeue_data_all(sock, tcp_recv_free);
154 debug_tcp_print("dequed RX data");
156 if (sock->pcb) {
157 int err;
159 /* we are not able to handle any callback anymore */
160 if (((struct tcp_pcb *)sock->pcb)->state != LISTEN) {
161 tcp_arg((struct tcp_pcb *)sock->pcb, NULL);
162 tcp_err((struct tcp_pcb *)sock->pcb, NULL);
163 tcp_sent((struct tcp_pcb *)sock->pcb, NULL);
164 tcp_recv((struct tcp_pcb *)sock->pcb, NULL);
167 err = tcp_close(sock->pcb);
168 assert(err == ERR_OK);
169 sock->pcb = NULL;
171 debug_tcp_print("freed pcb");
173 if (sock->buf) {
174 free_wbuf_chain((struct wbuf_chain *) sock->buf);
175 sock->buf = NULL;
177 debug_tcp_print("freed TX data");
179 debug_tcp_print("socket unused");
181 /* mark it as unused */
182 sock->ops = NULL;
184 return OK;
187 __unused static void print_tcp_payload(unsigned char * buf, int len)
189 int i;
191 printf("LWIP tcp payload (%d) :\n", len);
192 for (i = 0; i < len; i++, buf++) {
193 printf("%02x ", buf[0]);
194 if (i % 8 == 7)
195 kputc('\n');
197 kputc('\n');
200 static int read_from_tcp(struct socket * sock, struct sock_req * req)
202 unsigned int rem_buf, written = 0;
203 struct pbuf * p;
205 assert(!(sock->flags & SOCK_FLG_OP_LISTENING) && sock->recv_head);
207 rem_buf = req->size;
209 debug_tcp_print("socket num %ld recv buff sz %d", get_sock_num(sock), rem_buf);
211 p = (struct pbuf *)sock->recv_head->data;
212 while (rem_buf) {
213 int err;
215 if (rem_buf >= p->len) {
216 struct pbuf * np;
219 * FIXME perhaps copy this to a local buffer and do a
220 * single copy to user then
222 #if 0
223 print_tcp_payload(p->payload, p->len);
224 #endif
225 err = copy_to_user(req->endpt, p->payload, p->len,
226 req->grant, written);
227 if (err != OK)
228 goto cp_error;
229 sock->recv_data_size -= p->len;
231 debug_tcp_print("whole pbuf copied (%d bytes)", p->len);
232 rem_buf -= p->len;
233 written += p->len;
235 if ((np = p->next)) {
236 pbuf_ref(np);
237 if (pbuf_free(p) != 1)
238 panic("LWIP : pbuf_free != 1");
240 * Mark where we are going to continue if an
241 * error occurs
243 sock->recv_head->data = np;
244 p = np;
245 } else {
246 sock_dequeue_data(sock);
247 pbuf_free(p);
248 if (sock->recv_head)
249 p = (struct pbuf *)sock->recv_head->data;
250 else
251 break;
254 if (rem_buf == 0)
255 break;
256 } else {
258 * It must be PBUF_RAM for us to be able to shift the
259 * payload pointer
261 assert(p->type == PBUF_RAM);
263 #if 0
264 print_tcp_payload(p->payload, rem_buf);
265 #endif
266 err = copy_to_user(req->endpt, p->payload, rem_buf,
267 req->grant, written);
268 if (err != OK)
269 goto cp_error;
270 sock->recv_data_size -= rem_buf;
272 debug_tcp_print("partial pbuf copied (%d bytes)", rem_buf);
274 * The whole pbuf hasn't been copied out, we only shift
275 * the payload pointer to remember where to continue
276 * next time
278 pbuf_header(p, -rem_buf);
279 written += rem_buf;
280 break;
284 debug_tcp_print("%d bytes written to userspace", written);
285 //printf("%d wr, queue %d\n", written, sock->recv_data_size);
286 tcp_recved((struct tcp_pcb *) sock->pcb, written);
287 return written;
289 cp_error:
290 if (written) {
291 debug_tcp_print("%d bytes written to userspace", written);
292 return written;
293 } else
294 return EFAULT;
297 static int tcp_op_read(struct socket * sock, struct sock_req * req, int blk)
299 debug_tcp_print("socket num %ld", get_sock_num(sock));
301 if (!sock->pcb || ((struct tcp_pcb *) sock->pcb)->state !=
302 ESTABLISHED) {
303 debug_tcp_print("Connection not established\n");
304 return ENOTCONN;
306 if (sock->recv_head) {
307 /* data available receive immeditely */
308 int ret = read_from_tcp(sock, req);
309 debug_tcp_print("read op finished");
310 return ret;
311 } else {
312 if (sock->flags & SOCK_FLG_CLOSED) {
313 printf("socket %ld already closed!!! call from %d\n",
314 get_sock_num(sock), req->endpt);
315 do_tcp_debug = 1;
316 return 0;
318 if (!blk) {
319 debug_tcp_print("reading would block -> EAGAIN");
320 return EAGAIN;
322 /* operation is being processed */
323 sock->req = *req;
324 debug_tcp_print("no data to read, suspending");
325 sock->flags |= SOCK_FLG_OP_PENDING | SOCK_FLG_OP_READING;
326 return EDONTREPLY;
330 static struct wbuf * wbuf_add(struct socket * sock, unsigned int sz)
332 struct wbuf * wbuf;
333 struct wbuf_chain * wc = (struct wbuf_chain *)sock->buf;
335 assert(wc);
337 wbuf = debug_malloc(sizeof(struct wbuf) + sz);
338 if (!wbuf)
339 return NULL;
341 wbuf->len = sz;
342 wbuf->written = wbuf->unacked = 0;
343 wbuf->next = NULL;
345 if (wc->head == NULL)
346 wc->head = wc->tail = wbuf;
347 else {
348 wc->tail->next = wbuf;
349 wc->tail = wbuf;
352 sock->buf_size += sz;
353 debug_tcp_print("buffer %p size %d\n", wbuf, sock->buf_size);
355 return wbuf;
358 static struct wbuf * wbuf_ack_sent(struct socket * sock, unsigned int sz)
360 struct wbuf_chain * wc = (struct wbuf_chain *) sock->buf;
361 struct wbuf ** wb;
363 wb = &wc->head;
364 while (sz && *wb) {
365 if ((*wb)->unacked <= sz) {
366 struct wbuf * w;
367 assert((*wb)->rem_len == 0);
368 w = *wb;
369 *wb = w->next;
370 sock->buf_size -= w->len;
371 sz -= w->unacked;
372 debug_tcp_print("whole buffer acked (%d / %d), removed",
373 w->unacked, w->len);
374 debug_free(w);
375 } else {
376 (*wb)->unacked -= sz;
377 (*wb)->written += sz;
378 debug_tcp_print("acked %d / %d bytes", sz, (*wb)->len);
379 sz = 0;
383 /* did we write out more than we had? */
384 assert(sz == 0);
386 if (wc->head == NULL)
387 wc->tail = NULL;
388 debug_tcp_print("buffer size %d\n", sock->buf_size);
390 return wc->head;
393 static int tcp_op_write(struct socket * sock, struct sock_req * req,
394 __unused int blk)
396 int ret;
397 struct wbuf * wbuf;
398 unsigned int snd_buf_len, usr_buf_len;
399 u8_t flgs = 0;
402 if (!sock->pcb)
403 return ENOTCONN;
405 usr_buf_len = req->size;
406 debug_tcp_print("socket num %ld data size %d",
407 get_sock_num(sock), usr_buf_len);
410 * Let at most one buffer grow beyond TCP_BUF_SIZE. This is to minimize
411 * small writes from userspace if only a few bytes were sent before
413 if (sock->buf_size >= TCP_BUF_SIZE) {
414 /* FIXME do not block for now */
415 debug_tcp_print("WARNING : tcp buffers too large, cannot allocate more");
416 return ENOMEM;
419 * Never let the allocated buffers grow more than to 2xTCP_BUF_SIZE and
420 * never copy more than space available
422 usr_buf_len = (usr_buf_len > TCP_BUF_SIZE ? TCP_BUF_SIZE : usr_buf_len);
423 wbuf = wbuf_add(sock, usr_buf_len);
424 debug_tcp_print("new wbuf for %d bytes", wbuf->len);
426 if (!wbuf) {
427 debug_tcp_print("cannot allocate new buffer of %d bytes", usr_buf_len);
428 return ENOMEM;
431 if ((ret = copy_from_user(req->endpt, wbuf->data, usr_buf_len,
432 req->grant, 0)) != OK) {
433 return ret;
436 wbuf->written = 0;
437 wbuf->rem_len = usr_buf_len;
440 * If a writing operation is already in progress, we just enqueue the
441 * data and quit.
443 if (sock->flags & SOCK_FLG_OP_WRITING) {
444 struct wbuf_chain * wc = (struct wbuf_chain *)sock->buf;
446 * We are adding a buffer with unsent data. If we don't have any other
447 * unsent data, set the pointer to this buffer.
449 if (wc->unsent == NULL) {
450 wc->unsent = wbuf;
451 debug_tcp_print("unsent %p remains %d\n", wbuf, wbuf->rem_len);
453 debug_tcp_print("returns %d\n", usr_buf_len);
455 * We cannot accept new operations (write). We set the flag
456 * after sending reply not to revive only. We could deadlock.
459 * FIXME: this looks like bad logic. We acknowledge the write
460 * operation, so we will never reply to it or cancel it later.
462 if (sock->buf_size >= TCP_BUF_SIZE)
463 sock->flags |= SOCK_FLG_OP_PENDING;
465 return usr_buf_len;
469 * Start sending data if the operation is not in progress yet. The
470 * current buffer is the nly one we have, we cannot send more.
473 snd_buf_len = tcp_sndbuf((struct tcp_pcb *)sock->pcb);
474 debug_tcp_print("tcp can accept %d bytes", snd_buf_len);
476 wbuf->unacked = (snd_buf_len < wbuf->rem_len ? snd_buf_len : wbuf->rem_len);
477 wbuf->rem_len -= wbuf->unacked;
479 if (wbuf->rem_len) {
480 flgs = TCP_WRITE_FLAG_MORE;
482 * Remember that this buffer has some data which we didn't pass
483 * to tcp yet.
485 ((struct wbuf_chain *)sock->buf)->unsent = wbuf;
486 debug_tcp_print("unsent %p remains %d\n", wbuf, wbuf->rem_len);
489 ret = tcp_write((struct tcp_pcb *)sock->pcb, wbuf->data,
490 wbuf->unacked, flgs);
491 tcp_output((struct tcp_pcb *)sock->pcb);
492 debug_tcp_print("%d bytes to tcp", wbuf->unacked);
494 if (ret == ERR_OK) {
496 * Operation is being processed, no need to remember the message
497 * in this case, we are going to reply immediatly
499 debug_tcp_print("returns %d\n", usr_buf_len);
500 sock->flags |= SOCK_FLG_OP_WRITING;
502 * FIXME: this looks like bad logic. We acknowledge the write
503 * operation, so we will never reply to it or cancel it later.
505 if (sock->buf_size >= TCP_BUF_SIZE)
506 sock->flags |= SOCK_FLG_OP_PENDING;
507 return usr_buf_len;
508 } else
509 return EIO;
512 static int tcp_set_conf(struct socket * sock, endpoint_t endpt,
513 cp_grant_id_t grant)
515 int err;
516 nwio_tcpconf_t tconf;
517 struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
519 debug_tcp_print("socket num %ld", get_sock_num(sock));
521 assert(pcb);
523 err = copy_from_user(endpt, &tconf, sizeof(tconf), grant, 0);
525 if (err != OK)
526 return err;
528 debug_tcp_print("tconf.nwtc_flags = 0x%x", tconf.nwtc_flags);
529 debug_tcp_print("tconf.nwtc_remaddr = 0x%x",
530 (unsigned int) tconf.nwtc_remaddr);
531 debug_tcp_print("tconf.nwtc_remport = 0x%x", ntohs(tconf.nwtc_remport));
532 debug_tcp_print("tconf.nwtc_locaddr = 0x%x",
533 (unsigned int) tconf.nwtc_locaddr);
534 debug_tcp_print("tconf.nwtc_locport = 0x%x", ntohs(tconf.nwtc_locport));
536 sock->usr_flags = tconf.nwtc_flags;
538 if (sock->usr_flags & NWTC_SET_RA)
539 pcb->remote_ip.addr = tconf.nwtc_remaddr;
540 if (sock->usr_flags & NWTC_SET_RP)
541 pcb->remote_port = ntohs(tconf.nwtc_remport);
543 if (sock->usr_flags & NWTC_LP_SET) {
544 /* FIXME the user library can only bind to ANY anyway */
545 if (tcp_bind(pcb, IP_ADDR_ANY, ntohs(tconf.nwtc_locport)) == ERR_USE) {
546 return EADDRINUSE;
550 return OK;
553 static int tcp_get_conf(struct socket * sock, endpoint_t endpt,
554 cp_grant_id_t grant)
556 nwio_tcpconf_t tconf;
557 struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
559 debug_tcp_print("socket num %ld", get_sock_num(sock));
561 assert(pcb);
563 tconf.nwtc_locaddr = pcb->local_ip.addr;
564 tconf.nwtc_locport = htons(pcb->local_port);
565 tconf.nwtc_remaddr = pcb->remote_ip.addr;
566 tconf.nwtc_remport = htons(pcb->remote_port);
567 tconf.nwtc_flags = sock->usr_flags;
569 debug_tcp_print("tconf.nwtc_flags = 0x%x", tconf.nwtc_flags);
570 debug_tcp_print("tconf.nwtc_remaddr = 0x%x",
571 (unsigned int) tconf.nwtc_remaddr);
572 debug_tcp_print("tconf.nwtc_remport = 0x%x", ntohs(tconf.nwtc_remport));
573 debug_tcp_print("tconf.nwtc_locaddr = 0x%x",
574 (unsigned int) tconf.nwtc_locaddr);
575 debug_tcp_print("tconf.nwtc_locport = 0x%x", ntohs(tconf.nwtc_locport));
577 return copy_to_user(endpt, &tconf, sizeof(tconf), grant, 0);
580 static int enqueue_rcv_data(struct socket * sock, struct pbuf * pbuf)
582 /* Do not enqueue more data than allowed */
583 if (0 && sock->recv_data_size > 4 * TCP_BUF_SIZE)
584 return ERR_MEM;
586 if (sock_enqueue_data(sock, pbuf, pbuf->tot_len) != OK) {
587 debug_tcp_print("data enqueueing failed");
588 return ERR_MEM;
590 debug_tcp_print("enqueued %d bytes", pbuf->tot_len);
591 //printf("enqueued %d bytes, queue %d\n", pbuf->tot_len, sock->recv_data_size);
593 return ERR_OK;
596 static err_t tcp_recv_callback(void *arg,
597 struct tcp_pcb *tpcb,
598 struct pbuf *pbuf,
599 err_t err)
601 int ret, enqueued = 0;
602 struct socket * sock = (struct socket *) arg;
604 debug_tcp_print("socket num %ld", get_sock_num(sock));
606 if (sock->pcb == NULL) {
607 if (sock_select_set(sock))
608 sock_select_notify(sock);
609 return ERR_OK;
612 assert((struct tcp_pcb *) sock->pcb == tpcb);
614 if (err != ERR_OK)
615 return ERR_OK;
616 if (!pbuf) {
617 debug_tcp_print("tcp stream closed on the remote side");
618 // sock->flags |= SOCK_FLG_CLOSED;
620 /* wake up the reader and report EOF */
621 if (sock->flags & SOCK_FLG_OP_PENDING &&
622 sock->flags & SOCK_FLG_OP_READING) {
623 send_req_reply(&sock->req, 0);
624 sock->flags &= ~(SOCK_FLG_OP_PENDING |
625 SOCK_FLG_OP_READING);
627 #if 0
628 /* if there are any undelivered data, drop them */
629 sock_dequeue_data_all(sock, tcp_recv_free);
630 tcp_abandon(tpcb, 0);
631 sock->pcb = NULL;
632 #endif
634 return ERR_OK;
638 * FIXME we always enqueue the data first. If the head is empty and read
639 * operation is pending we could try to deliver immeditaly without
640 * enqueueing
642 if (enqueue_rcv_data(sock, pbuf) == ERR_OK)
643 enqueued = 1;
646 * Deliver data if there is a pending read operation, otherwise notify
647 * select if the socket is being monitored
649 if (sock->flags & SOCK_FLG_OP_PENDING) {
650 if (sock->flags & SOCK_FLG_OP_READING) {
651 ret = read_from_tcp(sock, &sock->req);
652 debug_tcp_print("read op finished");
653 send_req_reply(&sock->req, ret);
654 sock->flags &= ~(SOCK_FLG_OP_PENDING |
655 SOCK_FLG_OP_READING);
657 } else if (!(sock->flags & SOCK_FLG_OP_WRITING) &&
658 sock_select_rw_set(sock))
659 sock_select_notify(sock);
661 /* perhaps we have deliverd some data to user, try to enqueue again */
662 if (!enqueued) {
663 return enqueue_rcv_data(sock, pbuf);
664 } else
665 return ERR_OK;
668 static err_t tcp_sent_callback(void *arg, struct tcp_pcb *tpcb, u16_t len)
670 struct socket * sock = (struct socket *) arg;
671 struct wbuf * wbuf;
672 struct wbuf_chain * wc = (struct wbuf_chain *) sock->buf;
673 unsigned int snd_buf_len;
674 int ret;
676 debug_tcp_print("socket num %ld", get_sock_num(sock));
678 /* an error might have had happen */
679 if (sock->pcb == NULL) {
680 if (sock_select_set(sock))
681 sock_select_notify(sock);
682 return ERR_OK;
685 assert((struct tcp_pcb *)sock->pcb == tpcb);
687 /* operation must have been canceled, do not send any other data */
689 * FIXME: this looks like bad logic. We already acknowledged the write
690 * operation, so we should not set or check the OP_PENDING flag..
692 if (!(sock->flags & SOCK_FLG_OP_PENDING))
693 return ERR_OK;
695 wbuf = wbuf_ack_sent(sock, len);
697 if (wbuf == NULL) {
698 debug_tcp_print("all data acked, nothing more to send");
699 sock->flags &= ~SOCK_FLG_OP_WRITING;
700 if (!(sock->flags & SOCK_FLG_OP_READING))
701 sock->flags &= ~SOCK_FLG_OP_PENDING;
702 /* no reviving, we must notify. Write and read possible */
703 if (sock_select_rw_set(sock))
704 sock_select_notify(sock);
705 return ERR_OK;
708 /* we have just freed some space, write will be accepted */
709 if (sock->buf_size < TCP_BUF_SIZE && sock_select_rw_set(sock)) {
710 if (!(sock->flags & SOCK_FLG_OP_READING)) {
711 sock->flags &= ~SOCK_FLG_OP_PENDING;
712 sock_select_notify(sock);
717 * Check if there is some space for new data, there should be, we just
718 * got a confirmation that some data reached the other end of the
719 * connection
721 snd_buf_len = tcp_sndbuf(tpcb);
722 assert(snd_buf_len > 0);
723 debug_tcp_print("tcp can accept %d bytes", snd_buf_len);
725 if (!wc->unsent) {
726 debug_tcp_print("nothing to send");
727 return ERR_OK;
730 wbuf = wc->unsent;
731 while (wbuf) {
732 unsigned int towrite;
733 u8_t flgs = 0;
735 towrite = (snd_buf_len < wbuf->rem_len ?
736 snd_buf_len : wbuf->rem_len);
737 wbuf->rem_len -= towrite;
738 debug_tcp_print("data to send, sending %d", towrite);
740 if (wbuf->rem_len || wbuf->next)
741 flgs = TCP_WRITE_FLAG_MORE;
742 ret = tcp_write(tpcb, wbuf->data + wbuf->written + wbuf->unacked,
743 towrite, flgs);
744 debug_tcp_print("%d bytes to tcp", towrite);
746 /* tcp_output() is called once we return from this callback */
748 if (ret != ERR_OK) {
749 debug_print("tcp_write() failed (%d), written %d"
750 , ret, wbuf->written);
751 sock->flags &= ~(SOCK_FLG_OP_PENDING | SOCK_FLG_OP_WRITING);
752 /* no reviving, we must notify. Write and read possible */
753 if (sock_select_rw_set(sock))
754 sock_select_notify(sock);
755 return ERR_OK;
758 wbuf->unacked += towrite;
759 snd_buf_len -= towrite;
760 debug_tcp_print("tcp still accepts %d bytes\n", snd_buf_len);
762 if (snd_buf_len) {
763 assert(wbuf->rem_len == 0);
764 wbuf = wbuf->next;
765 wc->unsent = wbuf;
766 if (wbuf)
767 debug_tcp_print("unsent %p remains %d\n",
768 wbuf, wbuf->rem_len);
769 else {
770 debug_tcp_print("nothing to send");
772 } else
773 break;
776 return ERR_OK;
779 static err_t tcp_connected_callback(void *arg,
780 struct tcp_pcb *tpcb,
781 __unused err_t err)
783 struct socket * sock = (struct socket *) arg;
785 debug_tcp_print("socket num %ld err %d", get_sock_num(sock), err);
787 if (sock->pcb == NULL) {
788 if (sock_select_set(sock))
789 sock_select_notify(sock);
790 return ERR_OK;
793 assert((struct tcp_pcb *)sock->pcb == tpcb);
795 tcp_sent(tpcb, tcp_sent_callback);
796 tcp_recv(tpcb, tcp_recv_callback);
797 send_req_reply(&sock->req, OK);
798 sock->flags &= ~(SOCK_FLG_OP_PENDING | SOCK_FLG_OP_CONNECTING);
800 /* revive does the sock_select_notify() for us */
802 return ERR_OK;
805 static int tcp_op_connect(struct socket * sock, struct sock_req * req)
807 ip_addr_t remaddr;
808 struct tcp_pcb * pcb;
809 err_t err;
811 debug_tcp_print("socket num %ld", get_sock_num(sock));
813 * Connecting is going to send some packets. Unless an immediate error
814 * occurs this operation is going to block
816 sock->flags |= SOCK_FLG_OP_PENDING | SOCK_FLG_OP_CONNECTING;
818 /* try to connect now */
819 pcb = (struct tcp_pcb *) sock->pcb;
820 remaddr = pcb->remote_ip;
821 sock->req = *req;
822 err = tcp_connect(pcb, &remaddr, pcb->remote_port,
823 tcp_connected_callback);
824 if (err == ERR_VAL)
825 panic("Wrong tcp_connect arguments");
826 if (err != ERR_OK)
827 panic("Other tcp_connect error %d\n", err);
828 return EDONTREPLY;
831 static int tcp_do_accept(struct socket * listen_sock,
832 struct sock_req * req,
833 struct tcp_pcb * newpcb)
835 struct socket * newsock;
836 unsigned int sock_num;
837 int ret;
839 debug_tcp_print("socket num %ld", get_sock_num(listen_sock));
841 if ((ret = copy_from_user(req->endpt, &sock_num, sizeof(sock_num),
842 req->grant, 0)) != OK)
843 return EFAULT;
844 if (!is_valid_sock_num(sock_num))
845 return EBADF;
847 newsock = get_sock(sock_num);
848 assert(newsock->pcb); /* because of previous open() */
850 /* we really want to forget about this socket */
851 tcp_err((struct tcp_pcb *)newsock->pcb, NULL);
852 tcp_abandon((struct tcp_pcb *)newsock->pcb, 0);
854 tcp_arg(newpcb, newsock);
855 tcp_err(newpcb, tcp_error_callback);
856 tcp_sent(newpcb, tcp_sent_callback);
857 tcp_recv(newpcb, tcp_recv_callback);
858 tcp_nagle_disable(newpcb);
859 tcp_accepted(((struct tcp_pcb *)(listen_sock->pcb)));
860 newsock->pcb = newpcb;
862 debug_tcp_print("Accepted new connection using socket %d\n", sock_num);
864 return OK;
867 static err_t tcp_accept_callback(void *arg, struct tcp_pcb *newpcb, err_t err)
869 struct socket * sock = (struct socket *) arg;
871 debug_tcp_print("socket num %ld", get_sock_num(sock));
873 assert(err == ERR_OK && newpcb);
874 assert(sock->flags & SOCK_FLG_OP_LISTENING);
876 if (sock->flags & SOCK_FLG_OP_PENDING) {
877 int ret;
879 ret = tcp_do_accept(sock, &sock->req, newpcb);
880 send_req_reply(&sock->req, ret);
881 sock->flags &= ~SOCK_FLG_OP_PENDING;
882 if (ret == OK) {
883 return ERR_OK;
885 /* in case of an error fall through */
888 /* If we cannot accept rightaway we enqueue the connection for later */
890 debug_tcp_print("Enqueue connection sock %ld pcb %p\n",
891 get_sock_num(sock), newpcb);
892 if (sock_enqueue_data(sock, newpcb, 1) != OK) {
893 tcp_abort(newpcb);
894 return ERR_ABRT;
896 if (sock_select_read_set(sock))
897 sock_select_notify(sock);
899 return ERR_OK;
902 static int tcp_op_listen(struct socket * sock, endpoint_t endpt,
903 cp_grant_id_t grant)
905 int backlog, err;
906 struct tcp_pcb * new_pcb;
908 debug_tcp_print("socket num %ld", get_sock_num(sock));
910 err = copy_from_user(endpt, &backlog, sizeof(backlog), grant, 0);
912 if (err != OK)
913 return err;
915 new_pcb = tcp_listen_with_backlog((struct tcp_pcb *) sock->pcb,
916 (u8_t) backlog);
917 debug_tcp_print("listening pcb %p", new_pcb);
919 if (!new_pcb) {
920 debug_tcp_print("Cannot listen on socket %ld", get_sock_num(sock));
921 return EIO;
924 /* advertise that this socket is willing to accept connections */
925 tcp_accept(new_pcb, tcp_accept_callback);
926 sock->flags |= SOCK_FLG_OP_LISTENING;
928 sock->pcb = new_pcb;
929 return OK;
932 static int tcp_op_accept(struct socket * sock, struct sock_req * req)
934 debug_tcp_print("socket num %ld", get_sock_num(sock));
936 if (!(sock->flags & SOCK_FLG_OP_LISTENING)) {
937 debug_tcp_print("socket %ld does not listen\n", get_sock_num(sock));
938 return EINVAL;
941 /* there is a connection ready to be accepted */
942 if (sock->recv_head) {
943 int ret;
944 struct tcp_pcb * pcb;
946 pcb = (struct tcp_pcb *) sock->recv_head->data;
947 assert(pcb);
949 ret = tcp_do_accept(sock, req, pcb);
950 if (ret == OK)
951 sock_dequeue_data(sock);
952 return ret;
955 debug_tcp_print("no ready connection, suspending\n");
957 sock->req = *req;
959 sock->flags |= SOCK_FLG_OP_PENDING;
961 return EDONTREPLY;
964 static int tcp_op_shutdown_tx(struct socket * sock)
966 err_t err;
968 debug_tcp_print("socket num %ld", get_sock_num(sock));
970 err = tcp_shutdown((struct tcp_pcb *) sock->pcb, 0, 1);
972 switch (err) {
973 case ERR_OK:
974 return OK;
975 case ERR_CONN:
976 return ENOTCONN;
977 default:
978 return EIO;
982 static int tcp_op_get_cookie(struct socket * sock, endpoint_t endpt,
983 cp_grant_id_t grant)
985 tcp_cookie_t cookie;
986 unsigned int sock_num;
988 assert(sizeof(cookie) >= sizeof(sock));
990 sock_num = get_sock_num(sock);
991 memcpy(&cookie, &sock_num, sizeof(sock_num));
993 return copy_to_user(endpt, &cookie, sizeof(sock), grant, 0);
996 static int tcp_get_opt(struct socket * sock, endpoint_t endpt,
997 cp_grant_id_t grant)
999 nwio_tcpopt_t tcpopt;
1000 #if !defined(NDEBUG)
1001 struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
1002 #endif /* !defined(NDEBUG) */
1004 debug_tcp_print("socket num %ld", get_sock_num(sock));
1006 assert(pcb);
1008 /* FIXME : not used by the userspace library */
1009 tcpopt.nwto_flags = 0;
1011 return copy_to_user(endpt, &tcpopt, sizeof(tcpopt), grant, 0);
1014 static int tcp_set_opt(struct socket * sock, endpoint_t endpt,
1015 cp_grant_id_t grant)
1017 nwio_tcpopt_t tcpopt;
1018 #if !defined(NDEBUG)
1019 struct tcp_pcb * pcb = (struct tcp_pcb *) sock->pcb;
1020 #endif /* !defined(NDEBUG) */
1022 debug_tcp_print("socket num %ld", get_sock_num(sock));
1024 assert(pcb);
1026 /* FIXME : The userspace library does not use this */
1028 return copy_from_user(endpt, &tcpopt, sizeof(tcpopt), grant, 0);
1031 static int tcp_op_ioctl(struct socket * sock, struct sock_req * req,
1032 __unused int blk)
1034 int r;
1036 if (!sock->pcb)
1037 return ENOTCONN;
1039 debug_tcp_print("socket num %ld req %c %ld %ld",
1040 get_sock_num(sock),
1041 (unsigned char) (req->req >> 8),
1042 req->req & 0xff, _MINIX_IOCTL_SIZE(req->req));
1044 switch (req->req) {
1045 case NWIOGTCPCONF:
1046 r = tcp_get_conf(sock, req->endpt, req->grant);
1047 break;
1048 case NWIOSTCPCONF:
1049 r = tcp_set_conf(sock, req->endpt, req->grant);
1050 break;
1051 case NWIOTCPCONN:
1052 r = tcp_op_connect(sock, req);
1053 break;
1054 case NWIOTCPLISTENQ:
1055 r = tcp_op_listen(sock, req->endpt, req->grant);
1056 break;
1057 case NWIOGTCPCOOKIE:
1058 r = tcp_op_get_cookie(sock, req->endpt, req->grant);
1059 break;
1060 case NWIOTCPACCEPTTO:
1061 r = tcp_op_accept(sock, req);
1062 break;
1063 case NWIOTCPSHUTDOWN:
1064 r = tcp_op_shutdown_tx(sock);
1065 break;
1066 case NWIOGTCPOPT:
1067 r = tcp_get_opt(sock, req->endpt, req->grant);
1068 break;
1069 case NWIOSTCPOPT:
1070 r = tcp_set_opt(sock, req->endpt, req->grant);
1071 break;
1072 default:
1073 r = ENOTTY;
1076 return r;
1079 static int tcp_op_select(struct socket * sock, unsigned int sel)
1081 int retsel = 0;
1083 debug_tcp_print("socket num %ld 0x%x", get_sock_num(sock), sel);
1085 /* in this case any operation would block, no error */
1086 if (sock->flags & SOCK_FLG_OP_PENDING) {
1087 debug_tcp_print("SOCK_FLG_OP_PENDING");
1088 if (sel & CDEV_NOTIFY) {
1089 if (sel & CDEV_OP_RD) {
1090 sock->flags |= SOCK_FLG_SEL_READ;
1091 debug_tcp_print("monitor read");
1093 if (sel & CDEV_OP_WR) {
1094 sock->flags |= SOCK_FLG_SEL_WRITE;
1095 debug_tcp_print("monitor write");
1097 if (sel & CDEV_OP_ERR)
1098 sock->flags |= SOCK_FLG_SEL_ERROR;
1100 return 0;
1103 if (sel & CDEV_OP_RD) {
1105 * If recv_head is not NULL we can either read or accept a
1106 * connection which is the same for select()
1108 if (sock->pcb) {
1109 if (sock->recv_head &&
1110 !(sock->flags & SOCK_FLG_OP_WRITING))
1111 retsel |= CDEV_OP_RD;
1112 else if (!(sock->flags & SOCK_FLG_OP_LISTENING) &&
1113 ((struct tcp_pcb *) sock->pcb)->state != ESTABLISHED)
1114 retsel |= CDEV_OP_RD;
1115 else if (sel & CDEV_NOTIFY) {
1116 sock->flags |= SOCK_FLG_SEL_READ;
1117 debug_tcp_print("monitor read");
1119 } else /* not connected read does not block */
1120 retsel |= CDEV_OP_RD;
1122 if (sel & CDEV_OP_WR) {
1123 if (sock->pcb) {
1124 if (((struct tcp_pcb *) sock->pcb)->state == ESTABLISHED)
1125 retsel |= CDEV_OP_WR;
1126 else if (sel & CDEV_NOTIFY) {
1127 sock->flags |= SOCK_FLG_SEL_WRITE;
1128 debug_tcp_print("monitor write");
1130 } else /* not connected write does not block */
1131 retsel |= CDEV_OP_WR;
1134 if (retsel & CDEV_OP_RD) {
1135 debug_tcp_print("read won't block");
1137 if (retsel & CDEV_OP_WR) {
1138 debug_tcp_print("write won't block");
1141 /* we only monitor if errors will happen in the future */
1142 if (sel & CDEV_OP_ERR && sel & CDEV_NOTIFY)
1143 sock->flags |= SOCK_FLG_SEL_ERROR;
1145 return retsel;
1148 static int tcp_op_select_reply(struct socket * sock)
1150 unsigned int sel = 0;
1152 assert(sock->select_ep != NONE);
1153 debug_tcp_print("socket num %ld", get_sock_num(sock));
1155 if (sock->flags & SOCK_FLG_OP_PENDING) {
1156 debug_tcp_print("WARNING socket still blocking!");
1157 return EDONTREPLY;
1160 if (sock->flags & SOCK_FLG_SEL_READ) {
1161 if (sock->pcb == NULL || (sock->recv_head &&
1162 !(sock->flags & SOCK_FLG_OP_WRITING)) ||
1163 (!(sock->flags & SOCK_FLG_OP_LISTENING) &&
1164 ((struct tcp_pcb *) sock->pcb)->state !=
1165 ESTABLISHED)) {
1166 sel |= CDEV_OP_RD;
1167 debug_tcp_print("read won't block");
1171 if (sock->flags & SOCK_FLG_SEL_WRITE &&
1172 (sock->pcb == NULL ||
1173 ((struct tcp_pcb *) sock->pcb)->state ==
1174 ESTABLISHED)) {
1175 sel |= CDEV_OP_WR;
1176 debug_tcp_print("write won't block");
1179 if (sel)
1180 sock->flags &= ~(SOCK_FLG_SEL_WRITE | SOCK_FLG_SEL_READ |
1181 SOCK_FLG_SEL_ERROR);
1183 return sel;
1186 struct sock_ops sock_tcp_ops = {
1187 .open = tcp_op_open,
1188 .close = tcp_op_close,
1189 .read = tcp_op_read,
1190 .write = tcp_op_write,
1191 .ioctl = tcp_op_ioctl,
1192 .select = tcp_op_select,
1193 .select_reply = tcp_op_select_reply