Please wait until the page is fully downloaded and then press the "Expand" button or the blue line numbers.

0061001 /*
0061002 udp.c
0061003 
udp.c implements the udp layer in the network service. When a udp device file (e.g., /dev/udp) is opened, udp_open() is called. When a read, write, or ioctl request is made, udp_read(), udp_write(), and udp_ioctl() is called. When the process no longer needs to read or write to the device, udp_close() is called.

Understanding the arrays udp_port_table[] (described in detail on line 61036) and udp_fd_table[] (described in detail on line 61067) is necessary to understanding the udp code. For the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

two "udp ports", one associated with the ethernet device and one associated with the psip device, will be opened. When a process opens a udp device (e.g., /dev/udp), an element in the udp_fd_table[] is claimed and configured and then used to send and receive udp packets.

Note that "udp ports" as defined by RFC 768 are not the same as the udp ports mentioned above. The udp ports as defined by RFC 768 can range from 0 - 65535 and the udp ports mentioned above range from 0 - 3. It is unfortunate that the "udp ports" mentioned above were so named. To minimize confusion, the udp ports referenced by RFC 768 will be referred to as "common udp ports" whereas the udp ports mentioned above will be either referred to simply as "udp ports" or "physical udp ports".


0061004 Copyright 1995 Philip Homburg
0061005 */
0061006 
0061007 #include "inet.h"
0061008 #include "type.h"
0061009 
0061010 #include "assert.h"
0061011 #include "buf.h"
0061012 #include "clock.h"
0061013 #include "icmp_lib.h"
0061014 #include "io.h"
0061015 #include "ip.h"
0061016 #include "sr.h"
0061017 #include "udp.h"
0061018 
0061019 THIS_FILE
0061020 
0061021 #define UDP_FD_NR              (4*IP_PORT_MAX)
UDP_FD_NR is the number of udp file descriptors:

UDP_FD_NR = 4 * IP_PORT_MAX = 4 * 4 (remember that there can be two ethernet and two psip ports)


0061022 #define UDP_PORT_HASH_NR       16              /* Must be a power of 2 */
0061023 
0061024 typedef struct udp_port
0061025 {
0061026          int up_flags;
0061027          int up_state;
0061028          int up_ipfd;
0061029          int up_ipdev;
0061030          acc_t *up_wr_pack;
0061031          ipaddr_t up_ipaddr;
0061032          struct udp_fd *up_next_fd;
0061033          struct udp_fd *up_write_fd;
0061034          struct udp_fd *up_port_any;
0061035          struct udp_fd *up_port_hash[UDP_PORT_HASH_NR];
0061036 } udp_port_t;
udp_port / udp_port_table[]

For the following example inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there are 2 physical udp ports. Port 0 corresponds to the ethernet device whose corresponding udp device is /dev/udp0 and port 1 corresponds to the psip device whose corresponding udp device is /dev/udp1.

For this configuration file, udp_port_table[] will have two elements, udp_port_table[0] and udp_port_table[1]. udp_init() initializes these two elements to indicate they are unused and then calls udp_main(). udp_main(), in turn, calls ip_open(), which claims an element in ip_fd_table[] (in other words, an "ip file descriptor" is opened). In this way, the udp code and the ip code are interconnected. Note that there is a 1:1 relationship between udp_port_table[] elements and ip_fd_table[] elements. What this means is that a udp device (e.g., /dev/udp0) will use the same ip file descriptor (i.e., the same element of ip_fd_table[]) to handle every request (read, write, and i/o). On the other hand, there is a one-to-many relationship between udp ports and udp file descriptors.



Each element of udp_port_table[] is of type udp_port_t:

typedef struct udp_port

{
int up_flags;
int up_state;
int up_ipfd;
int up_ipdev;
acc_t *up_wr_pack;
ipaddr_t up_ipaddr;
struct udp_fd *up_next_fd;
struct udp_fd *up_write_fd;
struct udp_fd *up_port_any;
struct udp_fd *up_port_hash[UDP_PORT_HASH_NR];
} udp_port_t;

int up_flags:

#define UPF_EMPTY 0x0
0061039 #define UPF_WRITE_IP 0x1
0061040 #define UPF_WRITE_SP 0x2
0061041 #define UPF_READ_IP 0x4
0061042 #define UPF_READ_SP 0x8
0061043 #define UPF_SUSPEND 0x10
0061044 #define UPF_MORE2WRITE 0x20

Most of these flags are meaningless.

int up_state:

#define UPS_EMPTY 0
0061047 #define UPS_SETPROTO 1
0061048 #define UPS_GETCONF 2
0061049 #define UPS_MAIN 3
0061050 #define UPS_ERROR 4

During initialization, udp_main() is called to initialize the udp port. If the underlying ip port has been already configured, the udp port will be successfully initialized and the udp port's state will become UPS_MAIN, its normal operational state.

int up_ipfd:

As described above, ip_open() is called to open an ip file descriptor. up_ipfd is then set to this file descriptor (i.e., the index of the claimed element in ip_fd_table[]). In this way, the udp code and the ip code are linked. For example, immediately after opening the ip file descriptor, the udp code uses the up_ipfd field to configure the newly opened ip file descriptor.

int up_ipdev:

up_ipdev is equal to the port number of both the udp physical port and its associated ip physical port (which will be the same). For example, for the configuration file above, the udp physical port (and the associated ip physical port) corresponding to the first line (the ethernet entry) will have its up_ipdev field equal to 0 and the second line (the psip entry) will have its up_ipdev field equal to 1.

acc_t *up_wr_pack:

After a packet (e.g., a udp packet) has its udp header assembled but before it is passed to the lower layer (i.e., the ip layer), the packet is placed in this field. Note that this field is not a queue and that there will be at most a single packet placed here at any given time.


ipaddr_t up_ipaddr:

During the initialization of the udp port, udp_put_data() is called (indirectly) to set the ip address of the udp port to the ip address of the udp port's underlying ip port (which was set either by RARP or the "ifconfig -h host-IP-address" command).


struct udp_fd *up_next_fd:
struct udp_fd *up_write_fd:

If a write operation is suspended, up_write_fd is the udp file descriptor whose write operation is suspended.

struct udp_fd *up_port_any:
struct udp_fd *up_port_hash[UDP_PORT_HASH_NR]:


up_port_any and up_port_hash are both link lists of udp file descriptors. When a udp packet arrives at the ethernet/psip port, udp_ip_arrived() searches through both of these linked lists for the corresponding udp file descriptor(s).

UDP_PORT_HASH_NR is #define'd in udp.c as 16. up_port_hash[] is an array that uses a hash to find the corresponding udp file descriptor quickly.


0061037 
0061038 #define UPF_EMPTY       0x0
0061039 #define UPF_WRITE_IP       0x1
0061040 #define UPF_WRITE_SP       0x2
0061041 #define UPF_READ_IP       0x4
0061042 #define UPF_READ_SP       0x8
0061043 #define UPF_SUSPEND       0x10
0061044 #define UPF_MORE2WRITE       0x20
0061045 
0061046 #define UPS_EMPTY       0
0061047 #define UPS_SETPROTO       1
0061048 #define UPS_GETCONF       2
0061049 #define UPS_MAIN       3
0061050 #define UPS_ERROR       4
0061051 
0061052 typedef struct udp_fd
0061053 {
0061054          int uf_flags;
0061055          udp_port_t *uf_port;
0061056          ioreq_t uf_ioreq;
0061057          int uf_srfd;
0061058          nwio_udpopt_t uf_udpopt;
0061059          get_userdata_t uf_get_userdata;
0061060          put_userdata_t uf_put_userdata;
0061061          acc_t *uf_rdbuf_head;
0061062          acc_t *uf_rdbuf_tail;
0061063          size_t uf_rd_count;
0061064          size_t uf_wr_count;
0061065          time_t uf_exp_tim;
0061066          struct udp_fd *uf_port_next;
0061067 } udp_fd_t;
udp_fd / udp_fd_table[]

udp file descriptors (udp_fd's) make up udp_fd_table[]. When a process opens up a udp device file (e.g., /dev/udp), an element in udp_fd_table[] is set up. Many udp file descriptors are associated with each udp physical port (elements in udp_port_table[]). On the other hand, there is a 1:1 ratio between udp physical ports and ip file descriptors (elements in ip_fd_table[]).



typedef struct udp_fd

{
int uf_flags;
udp_port_t *uf_port;
ioreq_t uf_ioreq;
int uf_srfd;
nwio_udpopt_t uf_udpopt;
get_userdata_t uf_get_userdata;
put_userdata_t uf_put_userdata;
acc_t *uf_rdbuf_head;
acc_t *uf_rdbuf_tail;
size_t uf_rd_count;
size_t uf_wr_count;
time_t uf_exp_tim;
struct udp_fd *uf_port_next;
} udp_fd_t;

typedef struct nwio_udpopt
{
unsigned long nwuo_flags;
udpport_t nwuo_locport;
udpport_t nwuo_remport;
ipaddr_t nwuo_locaddr;
ipaddr_t nwuo_remaddr;
} nwio_udpopt_t;

int uf_flags:

uf_flags is a combination of the following flags:

#define UFF_EMPTY 0x0
#define UFF_INUSE 0x1
#define UFF_IOCTL_IP 0x2
#define UFF_READ_IP 0x4
#define UFF_WRITE_IP 0x8
#define UFF_OPTSET 0x10

uf_flags for all the udp file descriptors is initialized to UFF_EMPTY. When the udp file descipriptor is opened, uf_flags is set to UFF_INUSE and will remain so until the udp file descriptor is closed. After the udp file descriptor is opened, udp_ioctl() is called to configure the file descriptor, during which time the UFF_IOCTL_IP flag is set.

If a read request is received and cannot be immediately satisifed, UFF_READ_IP is set. However, all write requests are handled immediately and therefore the UFF_WRITE_IP flag is not important.

udp_port_t *uf_port:

uf_port is the udp physical port number that corresponds with this udp file descriptor. If /dev/udp0 was opened, uf_port will be a pointer to udp_port_table[0]. If /dev/udp1 was opened, uf_port will be a pointer to udp_port_table[1].


ioreq_t uf_ioreq:

If an ioctl request is made of the udp file descriptor (for example, a request to configure the file descriptor), uf_ioreq is set to the request. The different values that uf_ioreq can be are NWIOSUDPOPT (Set UDP OPTions) and NWIOGUDPOPT (Get UDP OPTions). The value of uf_ioreq is contained within a message received by the file system.


int uf_srfd:

The udp file descriptor's associated slot in sr_fd_table[].

nwio_udpopt_t uf_udpopt: (see below)
get_userdata_t uf_get_userdata:
put_userdata_t uf_put_userdata:


uf_get_userdata and uf_put_userdata are set to sr_get_userdata() and sr_put_userdata(), respectively. These values were passed in as arguments in sr_open().

acc_t *uf_rdbuf_head:
acc_t *uf_rdbuf_tail:


uf_rdbuf_head is the head and uf_rdbuf_tail is the tail of the udp file descriptor's read queue. Packets destined for the udp file descriptor are passed up from the lower layers (e.g., from the ethernet code to the ip code and finally to this queue). udp_read() eventually reads from this queue.

size_t uf_rd_count:
size_t uf_wr_count:


The number of bytes requested in a read/write request.

uf_rd_count is the number of bytes requested to be read from the read queue (see uf_rdbuf_head/uf_rdbuf_tail above) and uf_wr_count is the number of bytes requested to be written to lower layers (e.g., the ip code).

time_t uf_exp_tim:

When the read queue of a udp file descriptor is empty and a packet is placed in the queue, the timer (uf_exp_tim) is set. If this timer expires before the packet is read, this packet and all packets that have been subsequently to the read queue are discarded.


struct udp_fd *uf_port_next:

When a udp file descriptor is opened, hash_fd() places the file descriptor in a linked list. uf_port_next links the file descriptor to the next file descriptor in the linked list.


nwio_udpopt_t:
unsigned long nwuo_flags:
udpport_t nwuo_locport:
udpport_t nwuo_remport:
ipaddr_t nwuo_locaddr:
ipaddr_t nwuo_remaddr:



NWUO_RA_SET and NWUO_RP_SET specify that the Remote Address (RA) and the Remote Port (RP) are set for the udp file descriptor and that all packets written to the udp file descriptor are sent to the address/port nwuo_remaddr/nwuo_remport.

If the NWUO_LP_ANY flag is set, the port specified in the outgoing udp header is the port specified in the pseudo udp header that is written to the udp file descriptor along with the payload data. Otherwise, the port is taken from the udp file descriptor's configuration (specifically, the file descriptor's nwuo_locport field).

From ip(4):

"NWUO_RA_SET sets the remote IP address the value of nwuo_remaddr. Only packets from that address will be delivered and all packets will be sent to that address."

"NWUO_RP_SET sets the remote UDP port to the value of nwuo_remport. Only packets with a matching remote port will be delivered and all packets will be sent to that port."

"NWUO_LP_SEL requests the server to pick a port. This port will be in the range from 32768 to 65535 and it will be unique. NWUO_LP_SET sets the local port to the value of the nwuo_locport field. NWUO_LP_ANY does not select a port. Reception of data is therefore not possible but it is possible to send data."

These fields are set during the udp file descriptor's configuration.


0061068 
0061069 #define UFF_EMPTY       0x0
0061070 #define UFF_INUSE       0x1
0061071 #define UFF_IOCTL_IP       0x2
0061072 #define UFF_READ_IP       0x4
0061073 #define UFF_WRITE_IP       0x8
0061074 #define UFF_OPTSET       0x10
0061075 
0061076 FORWARD void read_ip_packets ARGS(( udp_port_t *udp_port ));
0061077 FORWARD void udp_buffree ARGS(( int priority ));
0061078 #ifdef BUF_CONSISTENCY_CHECK
0061079 FORWARD void udp_bufcheck ARGS(( void ));
0061080 #endif
0061081 FORWARD void udp_main ARGS(( udp_port_t *udp_port ));
0061082 FORWARD acc_t *udp_get_data ARGS(( int fd, size_t offset, size_t count,
0061083          int for_ioctl ));
0061084 FORWARD int udp_put_data ARGS(( int fd, size_t offset, acc_t *data,        
0061085          int for_ioctl ));
0061086 FORWARD void udp_restart_write_port ARGS(( udp_port_t *udp_port ));
0061087 FORWARD void udp_ip_arrived ARGS(( int port, acc_t *pack, size_t pack_size ));
0061088 FORWARD void reply_thr_put ARGS(( udp_fd_t *udp_fd, int reply,
0061089          int for_ioctl ));
0061090 FORWARD void reply_thr_get ARGS(( udp_fd_t *udp_fd, int reply,
0061091          int for_ioctl ));
0061092 FORWARD int udp_setopt ARGS(( udp_fd_t *udp_fd ));
0061093 FORWARD udpport_t find_unused_port ARGS(( int fd ));
0061094 FORWARD int is_unused_port ARGS(( Udpport_t port ));
0061095 FORWARD int udp_packet2user ARGS(( udp_fd_t *udp_fd ));
0061096 FORWARD void restart_write_fd ARGS(( udp_fd_t *udp_fd ));
0061097 FORWARD u16_t pack_oneCsum ARGS(( acc_t *pack ));
0061098 FORWARD void udp_rd_enqueue ARGS(( udp_fd_t *udp_fd, acc_t *pack,
0061099                                                  time_t exp_tim ));
0061100 FORWARD void hash_fd ARGS(( udp_fd_t *udp_fd ));
0061101 FORWARD void unhash_fd ARGS(( udp_fd_t *udp_fd ));
0061102 
0061103 PRIVATE udp_port_t *udp_port_table;
0061104 PRIVATE udp_fd_t udp_fd_table[UDP_FD_NR];
UDP_FD_NR is #define'd on line 61021 as 16. udp_fd_table[] is described above.


0061105 
0061106 PUBLIC void udp_prep()
udp_prep()

udp_prep() allocates memory for udp_port_table[], whose length is equal to the number of interfaces configured. For the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there will be 2 interfaces, one for the ethernet port and one for the psip port.

udp_prep() is called a single time from inet.c.


0061107 {
0061108          udp_port_table= alloc(ip_conf_nr * sizeof(udp_port_table[0]));
alloc()

On the Minix system, the memory for a process is divided in the following manner:



Instructions (e.g., MOV AX, BX) are stored in the text segment, initialized global variables are stored in the data segment, and uninitialized global variables are stored in the bss. Dynamically allocated memory is allocated from the heap (generally using malloc()), and automatic variables (among other things) are allocated from the stack.

The "break" is the boundary between the (data + bss + the space previously allocated from the heap) and the unallocated space from the heap. alloc(size_t size) increases the size of the (data + bss + already allocated space from the heap) (by calling sbrk()) and returns a pointer to the newly claimed area. If size, alloc()'s only parameter, is a multiple of 4, size bytes are claimed. If size is not a multiple of 4, the value is rounded up to the next multiple of 4 and this space is claimed.


0061109 }
0061110 
0061111 PUBLIC void udp_init()
udp_init()

Like udp_prep(), udp_init() is called only once. udp_init() initializes udp_port_table[] and calls sr_add_minor() and udp_main() to complete the initialization of the udp layer.


0061112 {
0061113          udp_fd_t *udp_fd;
0061114          udp_port_t *udp_port;
0061115          struct ip_conf *icp;
0061116          int i, j;
0061117 
0061118          assert (BUF_S >= sizeof(struct nwio_ipopt));
0061119          assert (BUF_S >= sizeof(struct nwio_ipconf));
0061120          assert (BUF_S >= sizeof(struct nwio_udpopt));
0061121          assert (BUF_S >= sizeof(struct udp_io_hdr));
0061122          assert (UDP_HDR_SIZE == sizeof(udp_hdr_t));
0061123          assert (UDP_IO_HDR_SIZE == sizeof(udp_io_hdr_t));
0061124 
0061125 #if ZERO
The following loop initializes udp_fd_table[], which has UDP_FD_NR elements (UDP_FD_NR = 16).

"UFF" stands for "Udp File descriptor Flag".

Code between "#if ZERO" and its matching "#endif" is not executed. Since udp_fd_table[] has already been zeroized by alloc() (see line 61108) and NULL and UFF_EMPTY are both equal to 0, this block does not need to be executed.


0061126          for (i= 0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++, udp_fd++)
0061127          {
0061128                   udp_fd->uf_flags= UFF_EMPTY;
0061129                   udp_fd->uf_rdbuf_head= NULL;
0061130          }
0061131 #endif
0061132 
0061133 #ifndef BUF_CONSISTENCY_CHECK
0061134          bf_logon(udp_buffree);
bf_logon()

bf_logon() is used by eth_init(), psip_init(), ip_init(), icmp_init(), tcp_init(), and udp_init() to register their functions for freeing buffers. For example, eth_init() calls bf_logon() with an argument of eth_buffree().

After bf_logon() is finished, freereq[] is configured as follows:

freereq[0]=eth_buffree
freereq[1]=psip_buffree
freereq[2]=ip_buffree
freereq[3]=icmp_buffree
freereq[4]=tcp_buffree
freereq[5]=udp_buffree



0061135 #else
0061136          bf_logon(udp_buffree, udp_bufcheck);
0061137 #endif
0061138 
0061139          for (i= 0, udp_port= udp_port_table, icp= ip_conf;
0061140                   i<ip_conf_nr; i++, udp_port++, icp++)
The following loop initializes
eth0 DP8390 0 { default; };
psip1;

there will be two elements initialized, one for the ethernet port and one for the psip port.

An understanding of udp_port_table[] is necessary.


udp_port / udp_port_table[]


For the following example inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there are 2 physical udp ports. Port 0 corresponds to the ethernet device whose corresponding udp device is /dev/udp0 and port 1 corresponds to the psip device whose corresponding udp device is /dev/udp1.

For this configuration file, udp_port_table[] will have two elements, udp_port_table[0] and udp_port_table[1]. udp_init() initializes these two elements to indicate they are unused and then calls udp_main(). udp_main(), in turn, calls ip_open(), which claims an element in ip_fd_table[] (in other words, an "ip file descriptor" is opened). In this way, the udp code and the ip code are interconnected. Note that there is a 1:1 relationship between udp_port_table[] elements and ip_fd_table[] elements. What this means is that a udp device (e.g., /dev/udp0) will use the same ip file descriptor (i.e., the same element of ip_fd_table[]) to handle every request (read, write, and i/o). On the other hand, there is a one-to-many relationship between udp ports and udp file descriptors.



Each element of udp_port_table[] is of type udp_port_t:

typedef struct udp_port

{
int up_flags;
int up_state;
int up_ipfd;
int up_ipdev;
acc_t *up_wr_pack;
ipaddr_t up_ipaddr;
struct udp_fd *up_next_fd;
struct udp_fd *up_write_fd;
struct udp_fd *up_port_any;
struct udp_fd *up_port_hash[UDP_PORT_HASH_NR];
} udp_port_t;

int up_flags:

#define UPF_EMPTY 0x0
0061039 #define UPF_WRITE_IP 0x1
0061040 #define UPF_WRITE_SP 0x2
0061041 #define UPF_READ_IP 0x4
0061042 #define UPF_READ_SP 0x8
0061043 #define UPF_SUSPEND 0x10
0061044 #define UPF_MORE2WRITE 0x20

Most of these flags are meaningless.

int up_state:

#define UPS_EMPTY 0
0061047 #define UPS_SETPROTO 1
0061048 #define UPS_GETCONF 2
0061049 #define UPS_MAIN 3
0061050 #define UPS_ERROR 4

During initialization, udp_main() is called to initialize the udp port. If the underlying ip port has been already configured, the udp port will be successfully initialized and the udp port's state will become UPS_MAIN, its normal operational state.

int up_ipfd:

As described above, ip_open() is called to open an ip file descriptor. up_ipfd is then set to this file descriptor (i.e., the index of the claimed element in ip_fd_table[]). In this way, the udp code and the ip code are linked. For example, immediately after opening the ip file descriptor, the udp code uses the up_ipfd field to configure the newly opened ip file descriptor.

int up_ipdev:

up_ipdev is equal to the port number of both the udp physical port and its associated ip physical port (which will be the same). For example, for the configuration file above, the udp physical port (and the associated ip physical port) corresponding to the first line (the ethernet entry) will have its up_ipdev field equal to 0 and the second line (the psip entry) will have its up_ipdev field equal to 1.

acc_t *up_wr_pack:

After a packet (e.g., a udp packet) has its udp header assembled but before it is passed to the lower layer (i.e., the ip layer), the packet is placed in this field. Note that this field is not a queue and that there will be at most a single packet placed here at any given time.


ipaddr_t up_ipaddr:

During the initialization of the udp port, udp_put_data() is called (indirectly) to set the ip address of the udp port to the ip address of the udp port's underlying ip port (which was set either by RARP or the "ifconfig -h host-IP-address" command).


struct udp_fd *up_next_fd:
struct udp_fd *up_write_fd:

If a write operation is suspended, up_write_fd is the udp file descriptor whose write operation is suspended.

struct udp_fd *up_port_any:
struct udp_fd *up_port_hash[UDP_PORT_HASH_NR]:


up_port_any and up_port_hash are both link lists of udp file descriptors. When a udp packet arrives at the ethernet/psip port, udp_ip_arrived() searches through both of these linked lists for the corresponding udp file descriptor(s).

UDP_PORT_HASH_NR is #define'd in udp.c as 16. up_port_hash[] is an array that uses a hash to find the corresponding udp file descriptor quickly.


0061141          {
0061142                   udp_port->up_ipdev= i;
0061143 
0061144 #if ZERO
0061145                   udp_port->up_flags= UPF_EMPTY;
0061146                   udp_port->up_state= UPS_EMPTY;
This is short-lived. up_state will be set to UPS_SETPROTO on line 61173.


0061147 #endif
0061148                   udp_port->up_next_fd= udp_fd_table;
0061149 #if ZERO
0061150                   udp_port->up_write_fd= NULL;
0061151                   udp_port->up_port_any= NULL;
0061152                   for (j= 0; j<UDP_PORT_HASH_NR; j++)
0061153                            udp_port->up_port_hash[j]= NULL;
0061154 #endif
0061155 
0061156                   sr_add_minor(if2minor(icp->ic_ifno, UDP_DEV_OFF),
0061157                            i, udp_open, udp_close, udp_read,
0061158                            udp_write, udp_ioctl, udp_cancel);
sr_fd / sr_fd_table[] / sr_add_minor()

One of the most important data arrays in the network service is sr_fd_table[], an array of 64 struct sr_fd's. Each sr_fd element in sr_fd_table[] corresponds to either a device or an opened file descriptor to a device (i.e., a "channel"):

typedef struct sr_fd

{
int srf_flags;
int srf_fd;
int srf_port;
sr_open_t srf_open;
sr_close_t srf_close;
sr_write_t srf_write;
sr_read_t srf_read;
sr_ioctl_t srf_ioctl;
sr_cancel_t srf_cancel;
mq_t *srf_ioctl_q, *srf_ioctl_q_tail;
mq_t *srf_read_q, *srf_read_q_tail;
mq_t *srf_write_q, *srf_write_q_tail;
} sr_fd_t;
For each device (e.g., /dev/udp0), an element in sr_fd_table[] is configured by sr_add_minor(). For example, for the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

an element (i.e., a struct sr_fd) is configured for each of the following devices:

/dev/eth0 sr_fd_table[1]
/dev/ip0 sr_fd_table[2]
/dev/tcp0 sr_fd_table[3]
/dev/udp0 sr_fd_table[4]

/dev/psip1 sr_fd_table[17]
/dev/ip1 sr_fd_table[18]
/dev/tcp1 sr_fd_table[19]
/dev/udp1 sr_fd_table[20]




sr_add_minor() is called in the initialization routines for the various protocols: mnx_eth.c (osdep_eth_init()), psip.c (psip_enable()), ip.c (ip_init()), tcp.c (tcp_init()), and udp.c (udp_init()).



When a device file (e.g., /dev/udp0) is opened by a process, the element that corresponds to the device is copied to an element that is currently unoccupied (see sr_open()). In this way, a "channel" is opened. Using this technique, a channel can be opened, closed, and manipulated without affecting the elements of the descriptors initially set by sr_add_minor().


int srf_flags:

srf_flags is a combination of the following:

#define SFF_FREE 0x00
#define SFF_MINOR 0x01
#define SFF_INUSE 0x02
#define SFF_BUSY 0x3C
#define SFF_IOCTL_IP 0x04
#define SFF_READ_IP 0x08
#define SFF_WRITE_IP 0x10
#define SFF_PENDING_REQ 0x30
#define SFF_SUSPENDED 0x1C0
#define SFF_IOCTL_SUSP 0x40
#define SFF_READ_SUSP 0x80
#define SFF_WRITE_SUSP

srf_flags is initialized to SFF_FREE for each element in sr_fd_table[]. If the channel corresponds to a device file, srf_flags is set to SFF_INUSE | SFF_MINOR. If the channel does not correspond to a device file, srf_flags is set simply to SFF_INUSE.

When a request comes in for a read, write, or ioctl operation and the network service is not already processing another request for the same operation, srf_flags is set to SFF_READ_IP, SFF_WRITE_IP, or SFF_IOCTL_IP. However, if an operation is attempted but the underlying protocol is still processing a previous request of the same nature (e.g., udp_write()), the appropriate flag (SFF_IOCTL_SUSP, SFF_READ_SUSP, or SFF_WRITE_SUSP) in srf_flags is set.


int srf_fd, srf_port:

srf_fd and srf_port are both set by sr_add_minor(). For the channels in srf_fd_table[] that correspond to the device files (e.g., /dev/udp0), srf_fd is set to the minor device number of the device. For example, if /dev/udp0 is added to sr_fd_table[] and the interface number of the device file is 0 (see comments for ip_conf[]), then the minor device number is:

if2minor(ifno, dev) = ((0)*16 + UDP_DEV = 0 + 4 = 4

For the channels in srf_fd_table[] that do not correspond to a device file, srf_fd is the file descriptor for the appropriate protocol. For example, if the file system requests that a udp channel be opened, srf_open is dereferenced and udp_open() is called. udp_open() opens a udp file descriptor and returns the index of the corresponding element in udp_fd_table[]. srf_fd is set to the index of this element.

Later, when the file system requests a read or a write on the open channel, srf_fd is passed into the protocol-specific read or write function (e.g., udp_read()), allowing the protocol-specific function to locate the appropriate file descriptor (e.g., udp file descriptor).

srf_port is more straight-forward. srf_port is the index in the protocol's port table. For example, if a system has two udp device files (/dev/udp0 and /dev/udp1), udp_port_table[] will have two entries, 0 and 1. Therefore, srf_port for the entry in sr_fd_table[] that corresponds to /dev/udp0 will be 0 and srf_port for the entry that corresponds to /dev/udp1 will be 1.


sr_open_t srf_open:
sr_close_t srf_close:
sr_write_t srf_write:
sr_read_t srf_read:
sr_ioctl_t srf_ioctl:
sr_cancel_t srf_cancel:


The fields above are all protocol-specific functions and and are all set by sr_add_minor(). For example, when sr_add_minor() is called by udp_init(), srf_open, srf_close, srf_write, srf_read, srf_ioctl, and srf_cancel are set to the pointers of the functions udp_open(), udp_close(), udp_write(), udp_read(), udp_ioctl(), and udp_cancel(). Later, when the file system makes a request to the network service, these functions will be called. For example, if the file system requests that data is written to a channel, srf_write is dereferenced and, if the channel is a udp channel, udp_write() is called.

mq_t *srf_ioctl_q, *srf_ioctl_q_tail:
mq_t *srf_read_q, *srf_read_q_tail:
mq_t *srf_write_q, *srf_write_q_tail:


The fields above are linked lists of ioctl, read, and write messages waiting to be processed. When a message requesting an ioctl, read, or write operation is received, the message is placed at the end of the linked list (unless there are no previous messages of this type that have not already been processed).


After the initialization of the network service, sr_rec() is called upon receipt of messages from the file system in the endless loop within main(). sr_rec() then calls a function to handle the specific request. For open requests, sr_rec() calls sr_open(); for read, write, and io requests, sr_rec() calls sr_rwio(); for close requests, sr_rec() calls sr_close(); for cancel requests, sr_rec() calls sr_cancel().


0061159 
0061160                   udp_main(udp_port);
udp_main()

udp_main(udp_port) is called in two scenarios:

1) The udp physical port udp_port, udp_main()'s only parameter, is being initialized. During the initialization, udp_main() calls ip_open() to open up an ip file descriptor before calling ip_ioctl() twice to configure the newly opened ip file descriptor (the first call) and to get the ip address of the underlying ip port (the second call).

2) After this initial call to udp_main(), all subsequent calls will finish up the udp port's initialization (if necessary). After this initialization is complete, udp_main() attempts to configure (by calling udp_ioctl()) any udp file descriptors whose configuration was previously suspended and then finally calls read_ip_packets() to process any ip packets waiting to be delivered to udp_port.


0061161          }
0061162 }
0061163 
0061164 PRIVATE void udp_main(udp_port)
0061165 udp_port_t *udp_port;
udp_main()

udp_main(udp_port) is called in two scenarios:

1) The udp physical port udp_port, udp_main()'s only parameter, is being initialized. During the initialization, udp_main() calls ip_open() to open up an ip file descriptor before calling ip_ioctl() twice to configure the newly opened ip file descriptor (the first call) and to get the ip address of the underlying ip port (the second call).

2) After this initial call to udp_main(), all subsequent calls will finish up the udp port's initialization (if necessary). After this initialization is complete, udp_main() attempts to configure (by calling udp_ioctl()) any udp file descriptors whose configuration was previously suspended and then finally calls read_ip_packets() to process any ip packets waiting to be delivered to udp_port.


0061166 {
0061167          udp_fd_t *udp_fd;
0061168          int result, i;
0061169 
0061170          switch (udp_port->up_state)
0061171          {
0061172          case UPS_EMPTY:
It is important to understand that lines 61172 - 61208 initialize the udp physical port udp_port, udp_main()'s only parameter. If the initialization is suspended for whatever reason, subsequent calls to udp_main() will continue where the code was previously suspended. In normal operation, the udp physical port's state is UPS_MAIN.


0061173                   udp_port->up_state= UPS_SETPROTO;
0061174 
0061175                   udp_port->up_ipfd= ip_open(udp_port->up_ipdev,
0061176                            udp_port->up_ipdev, udp_get_data, udp_put_data,
0061177                            udp_ip_arrived);
The up_ipfd field of a udp physical port is the udp port's associated ip file descriptor.

Note that the first and second arguments are the same. The first argument is the ip port number that corresponds to this udp port number and the second argument is the udp port number. udp_port_table[0] will always correspond to ip_port_table[0], udp_port_table[1] will always correspond to ip_port_table[1], and so on. These port numbers will always be the same. So, for example, if udp_port_table[0] was being configured, the first and second arguments would be zero.


ip_open()


ip_open() finds an available ip file descriptor in ip_fd_table[], sets a few of the ip file descriptor's fields, and then returns the index of the ip file descriptor within ip_fd_table[]. ip_open() is called by higher-level code (e.g., udp_main()) and the returned ip file descriptor is then associated with a higher-level port (e.g., udp port).



Note that there will only be a few ip file descriptors open at any given time. There will be an ip file descriptor opened for each interface for each client (udp, tcp, and icmp) and there will be one ip file descriptor opened each time the /dev/ip file is opened directly (as opposed to when, for example, the /dev/udp file is opened).


0061178                   if (udp_port->up_ipfd < 0)
If this point in the code is reached by udp_init() calling udp_main(), ip_open() is unlikely to fail to acquire an ip file descriptor (i.e., ip_open() returns a negative value). This would only occur if there were more than 32 protocols above ip (there are 32 ip file descriptors; in other words ip_fd_table[] has 32 elements).


0061179                   {
0061180                            udp_port->up_state= UPS_ERROR;
0061181                            DBLOCK(1, printf("%s, %d: unable to open ip port\n",
0061182                                     __FILE__, __LINE__));
0061183                            return;
0061184                   }
0061185 
0061186                   result= ip_ioctl(udp_port->up_ipfd, NWIOSIPOPT);
ip_ioctl()

ip_ioctl(fd, req) performs one of several tasks on the ip file descriptor whose index within ip_fd_table[] is fd, the first parameter. The task performed depends on req, the second parameter.

NWIOSIPOPT: Set the options (the if_ipopt field of the ip file descriptor) on the ip file descriptor. For example, during the initialization of a physical udp port, ip_ioctl() is called with req equal to NWIOSIPOPT.

An example of an ip iption (i.e., ip flag) is the NWIO_EN_BROAD flag. This flag is set if the ip file descriptor accepts broadcast packets. The options desired are obtained from the user process. For example, if a udp port opened up the ip file descriptor, udp_get_data() is (indirectly) called to obtain the configuration data.

NWIOGIPOPT: Send the ip file descriptor's options to the user process requesting the information. The information is sent in a struct of type nwio_ipopt_t.

NWIOSIPCONF: Configure the ip port (for example, the ip address can be configured) that corresponds to the ip file descriptor fd. The fields are obtained from the user process. For a detailed description of the different settings, click here.

NWIOGIPCONF: Send the ip address/subnet information (i.e., send a nwio_ipconf_t struct) to the next higher layer. For example, if the next higher layer is udp, ip_ioctl() calls (indirectly) udp_put_data(), which sets the ip address for the udp port (i.e., sets the up_ipaddr field of the corresponding element in udp_port_table[]).

NWIOGIPIROUTE, NWIOSIPIROUTE, NWIOGIPOROUTE, NWIODIPIROUTE, NWIOSIPOROUTE: It is possible to influence the route taken by a packet. These ioctl requests alter the input and output routing tables.


0061187                   if (result == NW_SUSPEND)
0061188                            udp_port->up_flags |= UPF_SUSPEND;
ip_ioctl() cannot return NW_SUSPEND when called with NWIOSIPOPT as the second parameter.


0061189                   if (result<0)
0061190                   {
0061191                            return;
0061192                   }
0061193                   if (udp_port->up_state != UPS_GETCONF)
ip_ioctl() calls udp_get_data() (indirectly), which sets up_state to UPS_GETCONF (if there aren't any problems). If up_state is indeed set to UPS_GETCONF, the code falls through.


0061194                            return;
0061195                   /* drops through */
0061196          case UPS_GETCONF:
0061197                   udp_port->up_flags &= ~UPF_SUSPEND;
0061198 
0061199                   result= ip_ioctl(udp_port->up_ipfd, NWIOGIPCONF);
ip_ioctl()

ip_ioctl(fd, req) performs one of several tasks on the ip file descriptor whose index within ip_fd_table[] is fd, the first parameter. The task performed depends on req, the second parameter.

NWIOSIPOPT: Set the options (the if_ipopt field of the ip file descriptor) on the ip file descriptor. For example, during the initialization of a physical udp port, ip_ioctl() is called with req equal to NWIOSIPOPT.

An example of an ip iption (i.e., ip flag) is the NWIO_EN_BROAD flag. This flag is set if the ip file descriptor accepts broadcast packets. The options desired are obtained from the user process. For example, if a udp port opened up the ip file descriptor, udp_get_data() is (indirectly) called to obtain the configuration data.

NWIOGIPOPT: Send the ip file descriptor's options to the user process requesting the information. The information is sent in a struct of type nwio_ipopt_t.

NWIOSIPCONF: Configure the ip port (for example, the ip address can be configured) that corresponds to the ip file descriptor fd. The fields are obtained from the user process. For a detailed description of the different settings, click here.

NWIOGIPCONF: Send the ip address/subnet information (i.e., send a nwio_ipconf_t struct) to the next higher layer. For example, if the next higher layer is udp, ip_ioctl() calls (indirectly) udp_put_data(), which sets the ip address for the udp port (i.e., sets the up_ipaddr field of the corresponding element in udp_port_table[]).

NWIOGIPIROUTE, NWIOSIPIROUTE, NWIOGIPOROUTE, NWIODIPIROUTE, NWIOSIPOROUTE: It is possible to influence the route taken by a packet. These ioctl requests alter the input and output routing tables.


0061200                   if (result == NW_SUSPEND)
0061201                            udp_port->up_flags |= UPF_SUSPEND;
If the ip port does not already have an assigned ip address, ip_ioctl() returns NW_SUSPEND.


0061202                   if (result<0)
0061203                   {
0061204                            return;
0061205                   }
0061206                   if (udp_port->up_state != UPS_MAIN)
The second time ip_ioctl() (indirectly) calls udp_put_data(), udp_put_data() sets up_state to UPS_MAIN (if the first call to ip_ioctl() was successful).


0061207                            return;
0061208                   /* drops through */
0061209          case UPS_MAIN:
If the code has reached this point, the initialization of the udp physical port is complete and the port is now in its normal operational state (UPS_MAIN). Note that a udp port cannot be read from or written to until its state is UPS_MAIN.

The code below attempts to configure the udp file descriptors whose previous configuration attempts were suspended and then calls read_ip_packets() to process any packets waiting to be delivered to udp file descriptors associated with this udp port.


0061210                   udp_port->up_flags &= ~UPF_SUSPEND;
0061211 
0061212                   for (i= 0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++, udp_fd++)
If there are any open udp file descriptors associated with the udp physical port udp_port, udp_main()'s only parameter, that are waiting for ioctl, call udp_ioctl() for the udp file descriptor. Obviously, if udp_main() is called as part of the initialization, no udp file descriptor is waiting for ioctl since no udp file descriptor has yet been opened.


0061213                   {
0061214                            if (!(udp_fd->uf_flags & UFF_INUSE))
0061215                                     continue;
0061216                            if (udp_fd->uf_port != udp_port)
0061217                                     continue;
0061218                            if (udp_fd->uf_flags & UFF_IOCTL_IP)
0061219                                     udp_ioctl(i, udp_fd->uf_ioreq);
udp_ioctl()

udp_ioctl() handles the following ioctl requests for udp file descriptors:


NWIOSUDPOPT: Set UDP OPTions. udp_ioctl() calls udp_setopt(), which gets data (specifically, a nwio_udpopt_t struct) from the user process and sets the options accordingly. Before a udp file descriptor can be used, the file descriptor must be configured by udp_ioctl().


NWIOGUDPOPT: Get UDP OPTions. Sends a nwio_udpopt_t struct to the process that opened the udp file descriptor which contains the configuration data for the udp file descriptor.


udp_ioctl() is called (indirectly) by sr_rwio().


0061220                   }
0061221                   read_ip_packets(udp_port);
read_ip_packets() / udp

read_ip_packets() is called only a single time, during the initialization of the udp code.

read_ip_packets() reads all packets (by repeatedly calling ip_read()) in the udp port's associated ip file descriptor's read queue until all of the packets have either been delivered to the processes that issued the read requests (provided that the packets have not expired) or placed in the udp file descriptor's read queue.

read_ip_packets() is also called by udp_get_data(). However, this line of code is never executed (see comment for line 61391).


0061222                   return;
0061223 #if !CRAMPED
0061224          default:
0061225                   DBLOCK(1, printf("udp_port_table[%d].up_state= %d\n",
0061226                            udp_port->up_ipdev, udp_port->up_state));
0061227                   ip_panic(( "unknown state" ));
0061228 #endif
0061229          }
0061230 }
0061231 
0061232 int udp_open (port, srfd, get_userdata, put_userdata, put_pkt)
0061233 int port;
0061234 int srfd;
0061235 get_userdata_t get_userdata;
0061236 put_userdata_t put_userdata;
0061237 put_pkt_t put_pkt;
udp_open()

udp_open(port, srfd, get_getuserdata, put_userdata, put_pkt), called from sr_open(), finds the first unused element in udp_fd_table[], configures this udp file descriptor with the arguments passed in, and returns the index of the (newly configured) element within udp_fd_table[].

The parameters are explained below:

When a process opens one of the udp devices (e.g., /dev/udp), sr_open() claims an unused element in sr_fd_table[] and copies the element of sr_fd_table[] that corresponds to the device to this unused element. This element number is then passed in as the second argument (srfd). Later, when the process wishes to read or write or perform ioctl on this open file descriptor, it includes this sr file descriptor in the message that it sends the file system.

For the following example inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there are 2 ports. Port 0 corresponds to the ethernet device and port 1 corresponds to the psip port.

get_userdata is a pointer to the function that gets the data from the process. This function will always be sr_get_userdata().

sr_put_userdata is a pointer to the function that copies data into the user process. This function will always be sr_put_userdata().

The put_pkt parameter is not used in this function.


0061238 {
0061239          int i;
0061240          udp_fd_t *udp_fd;
0061241 
0061242          for (i= 0; i<UDP_FD_NR && (udp_fd_table[i].uf_flags & UFF_INUSE);
0061243                   i++);
Find the first unused entry in udp_fd_table[]. The index of this element is eventually returned by udp_open().


0061244 
0061245          if (i>= UDP_FD_NR)
If the 16 UDP file descriptors are all unavailable, return EAGAIN.

EAGAIN stands for "resource temporarily unavailable." EAGAIN is #define'd in include/errno.h.


0061246          {
0061247                   DBLOCK(1, printf("out of fds\n"));
0061248                   return EAGAIN;
0061249          }
0061250 
0061251          udp_fd= &udp_fd_table[i];
0061252 
0061253          udp_fd->uf_flags= UFF_INUSE;
Mark the element as being used.


0061254          udp_fd->uf_port= &udp_port_table[port];
Link the newly claimed udp file descriptor to its associated udp port and its associated sr file descriptor.

Recall the relationship between the various arrays:






0061255          udp_fd->uf_srfd= srfd;
0061256          udp_fd->uf_udpopt.nwuo_flags= UDP_DEF_OPT;
Clear all the udp file descriptor's flags (UDP_DEF_OPT is #define'd as NWUO_NOFLAGS; NWUO_NOFLAGS is #define'd as 0x0000L).

Before the udp file descriptor may be read from or written to, the udp file descriptor must be configured using the NWIOSUDPOPT (NetWork IO Set UDP OPTions) ioctl call. A NWIOSUDPOPT ioctl call causes udp_setopt() (line 61473) to be called. udp_setopt() sets many of the uf_udpopt fields.


0061257          udp_fd->uf_get_userdata= get_userdata;
0061258          udp_fd->uf_put_userdata= put_userdata;
The uf_get_userdata and uf_put_userdata fields are set to pointers to sr_get_userdata() and sr_put_userdata(), respectively. These values were passed in as arguments in sr_add_minor().


sr_get_userdata()


sr_get_userdata() is the counterpart to sr_put_userdata() and does one of two things:

1) Copies data from a user process to a buffer (to be more specific, a chain of accessors) within the network service (this process). This can be either ioctl data (in which case, for_ioctl is TRUE) or data. For example, udp_setopt() (indirectly) calls sr_get_userdata() to get configuration data. Also, restart_write_fd() (indirectly) calls sr_get_userdata() before passing data onto the ip code.

2) Sends a REVIVE message to the file system (FS). For example, if an illegal option is selected while configuring a udp file descriptor, reply_thr_get() is called, which then (indirectly) calls sr_get_userdata(), passing in EBADMODE for the parameter count. restart_write_fd() also (indirectly) calls sr_get_userdata() to send a REVIVE message back to the FS indicating the number of bytes read after copying the data from the user process.

sr_get_userdata() is often called twice in close succession. The first time to attempt to copy the data from the user process and then the second time to send a message to the FS indicating whether the copy operation was successful and, if it was successful, the number of bytes copied.

In my opinion, like sr_put_userdata(), this function should have been made into two functions. As it is, it is too confusing.


0061259          assert(udp_fd->uf_rdbuf_head == NULL);
0061260          udp_fd->uf_port_next= NULL;
0061261 
0061262          return i;
Return the index of the newly claimed udp_fd_table[] element.


0061263 
0061264 }
0061265 
0061266 PRIVATE acc_t *udp_get_data (port, offset, count, for_ioctl)
0061267 int port;
0061268 size_t offset;
0061269 size_t count;
0061270 int for_ioctl;
udp_get_data()

udp_get_data() is called only indirectly by the ip code. udp_get_data() has a number of uses, which makes it a somewhat difficult function to understand.

During the initialization of a udp port (when the state of the udp port is UPS_SETPROTO), udp_get_data() is called twice. The first time udp_get_data() is called, the ip code tries to initialize the udp port's underlying ip file descriptor. udp_get_data() is called the second time (through reply_thr_get()) to change the state of the udp port to UPS_GETCONF.

After initialization (when the state of the udp port is UPS_MAIN), udp_get_data() is called to send either packets or configuration data to the underlying layer (i.e., the ip layer). For a write to a udp file descriptor, a packet is placed in the udp file descriptor's associated udp port's write field and udp_get_data() is then called by ip_write() to move the data to the ip layer. If ip_ioctl() is called to configure a udp port, ip_ioctl() calls udp_get_data() to get configuration data for the port (including, for example, the ip address of the port).

After a write operation is called, udp_get_data() is called a second time to clear some of the udp port's fields in preparation for the next write.


0061271 {
0061272          udp_port_t *udp_port;
0061273          udp_fd_t *udp_fd;
0061274          int result;
0061275 
0061276          udp_port= &udp_port_table[port];
Find the udp port whose index within udp_port_table[] is port, the first parameter of udp_get_data().


0061277 
0061278          switch(udp_port->up_state)
0061279          {
If the state of the port is UPS_SETPROTO, get configuration data for the underlying ip file descriptor. If the state is UPS_MAIN (the operational state), get packets.


0061280          case UPS_SETPROTO:
0061281 assert (for_ioctl);
0061282                   if (!count)
During initialization, as described above, ip_ioctl() calls udp_get_data() (indirectly through reply_thr_get()) to change the udp port's state to UPS_GETCONF.


0061283                   {
0061284                            result= (int)offset;
0061285                            if (result<0)
0061286                            {
0061287                                     udp_port->up_state= UPS_ERROR;
0061288                                     break;
0061289                            }
0061290                            udp_port->up_state= UPS_GETCONF;
0061291                            if (udp_port->up_flags & UPF_SUSPEND)
0061292                                     udp_main(udp_port);
If the initialization of the udp port was previously not able to finish, this is a chance for the initialization to finally finish. Also, handle unprocessed udp ioctl requests (if there are any) and handle unprocessed ip packets (again, if there are any).


udp_main()


udp_main(udp_port) is called in two scenarios:

1) The udp physical port udp_port, udp_main()'s only parameter, is being initialized. During the initialization, udp_main() calls ip_open() to open up an ip file descriptor before calling ip_ioctl() twice to configure the newly opened ip file descriptor (the first call) and to get the ip address of the underlying ip port (the second call).

2) After this initial call to udp_main(), all subsequent calls will finish up the udp port's initialization (if necessary). After this initialization is complete, udp_main() attempts to configure (by calling udp_ioctl()) any udp file descriptors whose configuration was previously suspended and then finally calls read_ip_packets() to process any ip packets waiting to be delivered to udp_port.


0061293                            return NULL;
0061294                   }
0061295                   else
During initialization (as described above), ip_ioctl() (indirectly) calls udp_get_data() to get configuration data appropriate for a udp port's underlying ip file descriptor (in the form of an nwio_ipopt_t struct).


0061296                   {
0061297                            struct nwio_ipopt *ipopt;
nwio_ipopt

The nwio_ipopt struct is used to pass ip option values from a higher-layer level (e.g., icmp, udp) to the ip layer during the configuration of an ip file descriptor. Note that an ip file descriptor cannot be used until it is configured.

typedef struct nwio_ipopt

{
u32_t nwio_flags;
ipaddr_t nwio_rem;
ip_hdropt_t nwio_hdropt;
u8_t nwio_tos;
u8_t nwio_ttl;
u8_t nwio_df;
ipproto_t nwio_proto;
} nwio_ipopt_t;
u32_t nwio_flags:

nwio_flags will be a combination of the flags below. Most of the flags within a set are exclusionary. For example, both NWIO_REMSPEC and NWIO_REMANY can't both be set.

Note that "EN" stands for "ENable" and "DI" stands for "DIsable".

#define NWIO_EXCL 0x00000001l
#define NWIO_SHARED 0x00000002l
#define NWIO_COPY 0x00000003l

From ip(4):

"The options covered by NWIO_ACC_MASK control the number of channels that can use one IP protocol. If NWIO_EXCL is specified then only that channel can use a certain IP protocol. If NWIO_SHARED then multiple channels that all have to specify NWIO_SHARED can use the same IP protocol, but incoming packets will be delivered to a most one channel. NWIO_COPY does not impose any restrictions. Every channel gets a copy of an incoming packet."

Note that, for whatever reason, NWIO_EXCL behaves exactly as NWIO_COPY. Every channel receives a copy of an incoming packet.

The access flags are important during the read of an ip file descriptor.

#define NWIO_EN_LOC 0x00000010l
#define NWIO_DI_LOC 0x00100000l
#define NWIO_EN_BROAD 0x00000020l
#define NWIO_DI_BROAD 0x00200000l

NWIO_EN_LOC specifies that this file descriptor can receive packets destined for this machine and NWIO_EN_BROAD specifies that this file descriptor can receive broadcast packets.

#define NWIO_REMSPEC 0x00000100l
#define NWIO_REMANY 0x01000000l

If the NWIO_REMANY flag is set, this file descriptor can send packets to any destination. If, on the other hand, the NWIO_REMSPEC flag is set, this file descriptor can only communicate with a single host. This host is specified by nwio_rem (see below).

#define NWIO_PROTOSPEC 0x00000200l
#define NWIO_PROTOANY 0x02000000l

If NWIO_PROTOANY is set, the ip file descriptor will accept packets of any protocol type. However, if NWIO_PROTOSPEC is set, only packets with a protocol type of nwio_proto (see below) are accepted.

#define NWIO_HDR_O_SPEC 0x00000400l
#define NWIO_HDR_O_ANY 0x04000000l

If the NWIO_HDR_O_SPEC flag in nwio_flags is set, nwio_hdropt (see below) must be set. If this is the case, the extra header information for all outgoing packets will be taken from nwio_hdropt, nwio_tos, nwio_ttl, and nwio_df (see below).

#define NWIO_RWDATONLY 0x00001000l
#define NWIO_RWDATALL 0x10000000l

NWIO_RWDATALL is a little tricky. If the NWIO_RWDATALL flag is set, the header was omitted when passing the packet down to ip code and the NWIO_EN_LOC, NWIO_DI_BROAD, NWIO_REMSPEC, NWIO_PROTOSPEC and NWIO_HDR_O_SPEC flags must all be set (and NWIO_REMANY and NWIO_PROTOANY cannot be set). In other words, this file descriptor can only send the data to one destination using one protocol.

During the configuration of an ip file descriptor being opened by the udp code, ip_ioctl() calls udp_get_data() (indirectly) to get configuration information. udp_get_data() returns the following configuration information:

NWIO_COPY | NWIO_EN_LOC | NWIO_EN_BROAD | NWIO_REMANY | NWIO_PROTOSPEC | NWIO_HDR_O_ANY | NWIO_RWDATALL

ipaddr_t nwio_rem:

If the NWIO_REMSPEC flag in nwio_flags is set (see above), nwio_rem is the ip address of the destination host.


ip_hdropt_t nwio_hdropt:

The ip header length is flexible to allow for extra options. For example, in addition to the normal fields (e.g., destination ip address), an ip header may specify the route it wishes to take or request that the route be recorded.

that it wishes to record a route or to ip_chk_hdropt().


u8_t nwio_tos:

"tos" stands for "Type Of Service". nwio_tos is initialized to 0 but can be changed by ip_ioctl().


u8_t nwio_ttl:

"ttl" stands for "Time To Live", which is the number of hops that a packet can take before being dropped by a router. nwio_ttl is initialized to 255 but can be changed by ip_ioctl().


u8_t nwio_df:

nwio_df specifies whether fragmentation is allowed or not. nwio_df is initialized to FALSE but, again, can be changed by ip_ioctl().


ipproto_t nwio_proto:

nwio_proto can take one of the values below. Obviously, if the udp code opens up an ip file descriptor, nwio_proto will be IPPROTO_UDP. The same is true for icmp and tcp.

#define IPPROTO_ICMP 1
#define IPPROTO_TCP 6
#define IPPROTO_UDP 17

This field is used in conjunction with the NWIO_PROTOSPEC flag in nwio_flags. If this flag is set, nwio_proto must be set.






0061298                            acc_t *acc;
0061299 
0061300 assert (!offset);
0061301 assert (count == sizeof(*ipopt));
0061302 
0061303                            acc= bf_memreq(sizeof(*ipopt));
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0061304                            ipopt= (struct nwio_ipopt *)ptr2acc_data(acc);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061305                            ipopt->nwio_flags= NWIO_COPY | NWIO_EN_LOC |
0061306                                     NWIO_EN_BROAD | NWIO_REMANY | NWIO_PROTOSPEC |
0061307                                     NWIO_HDR_O_ANY | NWIO_RWDATALL;
0061308                            ipopt->nwio_proto= IPPROTO_UDP;
Choose values for nwio_proto and nwio_flags that make sense for an ip file descriptor supporting the udp layer.


0061309                            return acc;
0061310                   }
0061311          case UPS_MAIN:
0061312 assert (!for_ioctl);
0061313 assert (udp_port->up_flags & UPF_WRITE_IP);
0061314                   if (!count)
Remove the first packet in the udp port's write queue and send the file system (FS) a message indicating success. offset, udp_get_data()'s second parameter, contains the message to be sent to the FS.

After a (successful or unsuccessful) write operation, ip_write.c's error_reply() is called with a value of 0 (zero) for count in order to get ready for the next write.

ip_ioctl.c's reply_thr_get() (indirectly) calls udp_get_data() to send the results of operations within ip_ioctl().


0061315                   {
0061316                            result= (int)offset;
0061317 assert (udp_port->up_wr_pack);
0061318                            bf_afree(udp_port->up_wr_pack);
Free the packet, whether the write operation was successful or unsuccessful.


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061319                            udp_port->up_wr_pack= 0;
After a packet (e.g., a udp packet) has its udp header assembled but before it is passed to the lower layer (i.e., the ip layer), the packet is placed in this field. Since the packet has either been processed or rejected by the ip layer, the field may be cleared.


0061320                            if (udp_port->up_flags & UPF_WRITE_SP)
It is VERY important to note that NW_WRITE_SP is never set. In previous versions of the network service, ip_write() (line 62324) could return NW_SUSPEND, which would set the UPF_WRITE_SP flag for the udp port. However, ip_write() currently only returns NW_OK. Therefore, the code on lines 61322 - 61335 is never executed and, for this reason, these lines will not be documented).


0061321                            {
0061322                                     if (udp_port->up_write_fd)
0061323                                     {
0061324                                              udp_fd= udp_port->up_write_fd;
0061325                                              udp_port->up_write_fd= NULL;
0061326                                              udp_fd->uf_flags &= ~UFF_WRITE_IP;
0061327                                              reply_thr_get(udp_fd, result, FALSE);
0061328                                     }
0061329                                     udp_port->up_flags &= ~(UPF_WRITE_SP |
0061330                                              UPF_WRITE_IP);
0061331                                     if (udp_port->up_flags & UPF_MORE2WRITE)
0061332                                     {
0061333                                              udp_restart_write_port(udp_port);
0061334                                     }
0061335                            }
0061336                            else
0061337                                     udp_port->up_flags &= ~UPF_WRITE_IP;
UPF_WRITE_IP was set preceding the call to ip_write() on line 62323. Since the operation (whether successful or unsuccessful) is over, the flag is cleared.


0061338                   }
0061339                   else
If called from ip_write() on line 43040, this else clause is executed. udp_get_data() passes the packet from the udp port's write queue that was placed there by restart_write_fd().


0061340                   {
0061341                            return bf_cut (udp_port->up_wr_pack, offset, count);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0061342                   }
0061343                   break;
0061344          default:
0061345 #if !CRAMPED
0061346                   printf("udp_get_data(%d, 0x%x, 0x%x) called but up_state= 0x%x\n",
0061347                            port, offset, count, udp_port->up_state);
0061348 #endif
0061349                   break;
0061350          }
0061351          return NULL;
0061352 }
0061353 
0061354 PRIVATE int udp_put_data (fd, offset, data, for_ioctl)
0061355 int fd;
0061356 size_t offset;
0061357 acc_t *data;
0061358 int for_ioctl;
udp_put_data()

During the initialization of the udp layer, the udp code calls ip_ioctl() twice. The second time, the udp code calls ip_ioctl() with an argument of NWIOGIPCONF. ip_ioctl() then (indirectly) calls udp_put_data() twice. The first call to udp_put_data() is to get the underlying ip file descriptor's ip address (to which it sets the udp port's up_ipaddr field). The second call to udp_put_data() simply sets the udp port's state to UPS_MAIN.

After the initialization (when the state of the udp port is UPS_MAIN), udp_put_data() is called only by ip_read() to handle the (unlikely) case that the underlying ip file descriptor was not already configured.


0061359 {
0061360          udp_port_t *udp_port;
0061361          int result;
0061362 
0061363          udp_port= &udp_port_table[fd];
0061364 
0061365          switch (udp_port->up_state)
0061366          {
0061367          case UPS_GETCONF:
During the initialization of the udp layer, the udp code calls ip_ioctl() twice. The second time, the udp code calls ip_ioctl() with an argument of NWIOGIPCONF. ip_ioctl() then (indirectly) calls udp_put_data() twice. The first call to udp_put_data() is to get the underlying ip file descriptor's ip address (to which it sets the udp port's up_ipaddr field). The second call to udp_put_data() simply sets the udp port's state to UPS_MAIN.


0061368                   if (!data)
If the call to ip_ioctl() during the udp layer's initialization was successful, simply set the state of the udp port's to UPS_MAIN. If, however, the call to ip_ioctl() was not successful because the ip file descriptor's underlying ip port's ip address was not set, call udp_main() to try the initialization again.


0061369                   {
0061370                            result= (int)offset;
0061371                            if (result<0)
0061372                            {
0061373                                     udp_port->up_state= UPS_ERROR;
0061374                                     return NW_OK;
0061375                            }
0061376                            udp_port->up_state= UPS_MAIN;
0061377                            if (udp_port->up_flags & UPF_SUSPEND)
0061378                                     udp_main(udp_port);
udp_main()

udp_main(udp_port) is called in two scenarios:

1) The udp physical port udp_port, udp_main()'s only parameter, is being initialized. During the initialization, udp_main() calls ip_open() to open up an ip file descriptor before calling ip_ioctl() twice to configure the newly opened ip file descriptor (the first call) and to get the ip address of the underlying ip port (the second call).

2) After this initial call to udp_main(), all subsequent calls will finish up the udp port's initialization (if necessary). After this initialization is complete, udp_main() attempts to configure (by calling udp_ioctl()) any udp file descriptors whose configuration was previously suspended and then finally calls read_ip_packets() to process any ip packets waiting to be delivered to udp_port.


0061379                   }
0061380                   else
During the initialization of the udp layer, the udp code calls ip_ioctl() with an argument of NWIOGIPCONF. ip_ioctl() then (indirectly) calls udp_get_data(). The udp code makes this call to ip_ioctl() in order to get the underlying ip file descriptor's ip address (to which it sets the udp port's up_ipaddr field).


0061381                   {
0061382                            struct nwio_ipconf *ipconf;
nwio_ipconf_t

Type nwio_ipconf_t contains the ip address information of an ip port (i.e., an element in ip_port_table[]:

typedef struct nwio_ipconf

{
u32_t nwic_flags;
ipaddr_t nwic_ipaddr;
ipaddr_t nwic_netmask;
} nwio_ipconf_t;
nwic_flags: reflects whether the ip address and the subnet mask have been set. The different flags are as follows:

#define NWIC_NOFLAGS 0x0
#define NWIC_FLAGS 0x3
#define NWIC_IPADDR_SET 0x1
#define NWIC_NETMASK_SET 0x2

nwic_ipaddr: the ip address (e.g., 192.168.5.5)

nwic_netmask: the subnet mask (e.g., 255.255.255.0)


0061383 
0061384                            data= bf_packIffLess(data, sizeof(*ipconf));
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0061385                            ipconf= (struct nwio_ipconf *)ptr2acc_data(data);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061386 assert (ipconf->nwic_flags & NWIC_IPADDR_SET);
0061387                            udp_port->up_ipaddr= ipconf->nwic_ipaddr;
0061388                            bf_afree(data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061389                   }
0061390                   break;
0061391          case UPS_MAIN:
udp_get_data() is called only from ip_read() if IFF_OPTSET has not been set (i.e., the ip file descriptor has not been configured). Under normal circumstances, this flag will be set during the network initialization. If this flag has not been set, the network service terminates.


0061392                   assert(0);
0061393 
0061394                   assert (udp_port->up_flags & UPF_READ_IP);
0061395                   if (!data)
0061396                   {
0061397                            result= (int)offset;
0061398                            compare (result, >=, 0);
compare()

compare is #define'd in inet/generic/assert.h:

#define compare(a,t,b) (!((a) t (b)) ? bad_compare(this_file, __LINE__, \
(a), #a " " #t " " #b, (b)) : (void) 0)

and bad_compare() is defined in inet/inet.c.

If the relationship between the 3 arguments in compare() does not hold, some debugging output is emitted and then Minix is terminated.

For example, if compare(result, >=, 0) is called and result (the first argument) is -1, Minix will be terminated.


0061399                            if (udp_port->up_flags & UPF_READ_SP)
0061400                            {
0061401                                     udp_port->up_flags &= ~(UPF_READ_SP|
0061402                                              UPF_READ_IP);
0061403                                     read_ip_packets(udp_port);
read_ip_packets() / udp

read_ip_packets() is called only a single time, during the initialization of the udp code.

read_ip_packets() reads all packets (by repeatedly calling ip_read()) in the udp port's associated ip file descriptor's read queue until all of the packets have either been delivered to the processes that issued the read requests (provided that the packets have not expired) or placed in the udp file descriptor's read queue.

read_ip_packets() is also called by udp_get_data(). However, this line of code is never executed (see comment for line 61391).


0061404                            }
0061405                            else
0061406                                     udp_port->up_flags &= ~UPF_READ_IP;
0061407                   }
0061408                   else
udp_get_data() will not be called with a non-zero value for data if the state of the udp port is UDP_MAIN. Since if_put_pkt will be set to udp_ip_arrived() for an ip file descriptor opened by the udp code, ip_read() (indirectly) calls udp_ip_arrived() instead of udp_get_data().


0061409                   {
0061410 assert (!offset);       /* This isn't a valid assertion but ip sends only
0061411                             * whole datagrams up */
0061412                            udp_ip_arrived(fd, data, bf_bufsize(data));
udp_ip_arrived()

udp_ip_arrived() is called either (indirectly) from the ip code or from udp_put_data() and is one of the last functions called for a read request from a user process. udp_ip_arrived() does some checks (e.g., a checksum check) and figures out the destination udp file descriptor before calling udp_packet2user() to deliver the packet to the process that requested the read.


0061413                   }
0061414                   break;
0061415 #if !CRAMPED
0061416          default:
0061417                   ip_panic((
0061418                   "udp_put_data(%d, 0x%x, 0x%x) called but up_state= 0x%x\n",
0061419                                              fd, offset, data, udp_port->up_state ));
0061420 #endif
0061421          }
0061422          return NW_OK;
0061423 }
0061424 
0061425 int udp_ioctl (fd, req)
0061426 int fd;
0061427 ioreq_t req;
udp_ioctl()

udp_ioctl() handles the following ioctl requests for udp file descriptors:


NWIOSUDPOPT: Set UDP OPTions. udp_ioctl() calls udp_setopt(), which gets data (specifically, a nwio_udpopt_t struct) from the user process and sets the options accordingly. Before a udp file descriptor can be used, the file descriptor must be configured by udp_ioctl().


NWIOGUDPOPT: Get UDP OPTions. Sends a nwio_udpopt_t struct to the process that opened the udp file descriptor which contains the configuration data for the udp file descriptor.


udp_ioctl() is called (indirectly) by sr_rwio().


0061428 {
0061429          udp_fd_t *udp_fd;
0061430          udp_port_t *udp_port;
0061431          nwio_udpopt_t *udp_opt;
0061432          acc_t *opt_acc;
0061433          int result;
0061434 
0061435          udp_fd= &udp_fd_table[fd];
Find the udp file descriptor with an index of fd (the first parameter of udp_ioctl()) within udp_fd_table[].


0061436 
0061437 assert (udp_fd->uf_flags & UFF_INUSE);
0061438 
0061439          udp_port= udp_fd->uf_port;
Find the udp port that corresponds to the udp file descriptor.


0061440          udp_fd->uf_flags |= UFF_IOCTL_IP;
Indicate that an ioctl operation is in progress and specify which operation (NWIOSUDPOPT or NWIOGUDPOPT) has been requested (next line).


0061441          udp_fd->uf_ioreq= req;
0061442 
0061443          if (udp_port->up_state != UPS_MAIN)
0061444                   return NW_SUSPEND;
During the initialization of the network service, all udp ports are initialized. Near the end of this initialization, the state of the udp port is set to UPS_MAIN.


0061445 
0061446          switch(req)
0061447          {
0061448          case NWIOSUDPOPT:
NWIOSUDPOPT stands for "NetWork IO Set UDP OPTions." This operation attempts to set the options for a udp file descriptor.

Before a udp file descriptor may be used (e.g., before the udp file descriptor may be read), its options must be set.


0061449                   result= udp_setopt(udp_fd);
udp_setopt()

udp_setopt() is called by only a single function, udp_ioctl().

Before a udp file descriptor can be used, the file descriptor must be configured by udp_setopt(). udp_setopt() gets the configuration data from the process that opened the file descriptor, verifies that the configuration data is valid, sets the uf_udpopt field of the udp file descriptor to reflect the new configuration, and then marks the file descriptor as configured (i.e., sets the UFF_OPTSET flag in uf_flags).


0061450                   break;
0061451          case NWIOGUDPOPT:
NWIOGUDPOPT stands for "NetWork IO Get UDP OPTions." This operation returns the options (in a nwio_udpopt_t struct) of a udp file descriptor to the user process that opened it.

The steps involved are:

1) Acquire a buffer (lines 61452-61454).
2) Copy the options and the ip address of the port to the buffer (lines 61456-61457).
3) Send the buffer to the user process (lines 61458-61459).


From ip(4):

"The NWIOGUDPOPT ioctl returns the current options that result from the
default options and the options set with NWIOSUDPOPT. When NWUO_LP_SEL
or NWUO_LP_SET is selected, the local port is returned in nwuo_locport.
When NWUO_RP_SET is selected, the remote port is returned in nwuo_remport.
The local address is always returned in nwuo_locaddr, and when NWUO_RA_SET is selected, the remote address is returned in nwuo_remaddr."


nwio_udpopt_t


struct nwio_udpopt_t contains configuration information for a udp file descriptor. The uf_udpopt field of a udp file descriptor has type nwio_udpopt_t.

typedef struct nwio_udpopt 

{
unsigned long nwuo_flags;
udpport_t nwuo_locport;
udpport_t nwuo_remport;
ipaddr_t nwuo_locaddr;
ipaddr_t nwuo_remaddr;
} nwio_udpopt_t;
nwuo_flags:

#define NWUO_NOFLAGS 0x0000L
#define NWUO_ACC_MASK 0x0003L
#define NWUO_EXCL 0x00000001L
#define NWUO_SHARED 0x00000002L
#define NWUO_COPY 0x00000003L

File descriptors that share the same port must have the same access permissions (i.e., their NWUO_EXCL, NWUO_SHARED, and NWUO_COPY flags are the same).

There does appear to be an error in the code, however. If the NWUO_EXCL (exclusive access) flag is set, only one udp file descriptor should have access to the common udp port (0-65535). Unfortunately, this is not the case. Also, NWUO_COPY has no significance and is never used in the code.

#define NWUO_LOCPORT_MASK 0x000CL
#define NWUO_LP_SEL 0x00000004L
#define NWUO_LP_SET 0x00000008L
#define NWUO_LP_ANY 0x0000000CL

One of the flag sets that cannot be disabled. There are three different choices:

NWUO_LP_SEL: The network service chooses the port. This port will be in the 49152-65535 range and will be unique. Note that this is inconsistent with the ip(4) documentation (which says that the port will be in the 32768-65535 range).

NWUO_LP_SET: The local port is specified in the nwuo_locport field of the nwio_udpopt_t struct (see below).

NWUO_LP_ANY: The file descriptor accepts packets destined for any port.


#define NWUO_LOCADDR_MASK 0x0010L
#define NWUO_EN_LOC 0x00000010L
#define NWUO_DI_LOC 0x00100000L

If enabled, accept packets destined for local address specified by the nwuo_locaddr field of the nwio_udpopt_t struct (see below).

#define NWUO_BROAD_MASK 0x0020L
#define NWUO_EN_BROAD 0x00000020L
#define NWUO_DI_BROAD 0x00200000L

If enabled, accept broadcast packets. If disabled, reject broadcast packets.


#define NWUO_REMPORT_MASK 0x0100L
#define NWUO_RP_SET 0x00000100L
#define NWUO_RP_ANY 0x01000000L
#define NWUO_REMADDR_MASK 0x0200L
#define NWUO_RA_SET 0x00000200L
#define NWUO_RA_ANY 0x02000000L

A udp file descriptor can be configured to only send packets to a specific udp port and ip address. If NWUO_RP_SET and NWUO_RA_SET are enabled, a remote port (nwuo_remport) and a remote address (nwuo_locaddr) must be specified. If NWUO_RP_ANY and/or NWUO_RA_ANY are/is set, the destination may be any udp port and/or any ip address. See below.

#define NWUO_RW_MASK 0x1000L
#define NWUO_RWDATONLY 0x00001000L
#define NWUO_RWDATALL 0x10000000L

If NWUO_RWDATONLY is set, only the data is sent to the ip layer. If NWUO_RWDATALL is set, the source and destination ip addresses, the source and destination udp ports, and other information (e.g., ip options) are sent (in addition to the data) to the ip layer.


#define NWUO_IPOPT_MASK 0x2000L
#define NWUO_EN_IPOPT 0x00002000L
#define NWUO_DI_IPOPT 0x20000000L

If NWUO_EN_IPOPT (ENable IP OPTions) is set, ip options are sent to the ip layer and received from the ip layer. If NWUO_DI_IPOPT (DIsable IP OPTions) is set, ip options are not sent to the ip layer and stripped from the packets received from the ip layer.



If there are any illegal combinations of flags (e.g., the NWUO_RWDATONLY and an inappropriate ..._ANY flag is set), the network service sends the user process an EBADMODE message.

EBADMODE is #define'd in /include/errno.h:

#define EBADMODE (_SIGN 53) /* badmode in ioctl */


nwuo_locport, nwuo_locaddr: The local udp port and address of the udp file descriptor.

nwuo_remport, nwuo_remaddr: (optional) The remote udp port and address of the remote system. If only a single destination is desired, these two fields are set.


0061452                   opt_acc= bf_memreq(sizeof(*udp_opt));
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0061453 assert (opt_acc->acc_length == sizeof(*udp_opt));
0061454                   udp_opt= (nwio_udpopt_t *)ptr2acc_data(opt_acc);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061455 
0061456                   *udp_opt= udp_fd->uf_udpopt;
0061457                   udp_opt->nwuo_locaddr= udp_fd->uf_port->up_ipaddr;
0061458                   result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, 0, opt_acc,
0061459                            TRUE);
The uf_get_userdata and uf_put_userdata fields are set to pointers to sr_get_userdata() and sr_put_userdata(), respectively. These values were passed in as arguments in sr_add_minor().

Recall that each udp file descriptor has a corresponding sr file descriptor. The uf_srfd field of the udp file descriptor contains this sr file descriptor.


0061460                   if (result == NW_OK)
0061461                            reply_thr_put(udp_fd, NW_OK, TRUE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0061462                   break;
0061463          default:
Only NWIOSUDPOPT and NWIOGUDPOPT requests are valid requests. All other requests are rejected.


0061464                   reply_thr_get(udp_fd, EBADIOCTL, TRUE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0061465                   result= NW_OK;
0061466                   break;
0061467          }
0061468          if (result != NW_SUSPEND)
0061469                   udp_fd->uf_flags &= ~UFF_IOCTL_IP;
If the operation wasn't suspended, clear the UFF_IOCTL_IP flag. In other words, the ioctl request has been handled.


0061470          return result;
0061471 }
0061472 
0061473 PRIVATE int udp_setopt(udp_fd)
0061474 udp_fd_t *udp_fd;
udp_setopt()

udp_setopt() is called by only a single function, udp_ioctl().

Before a udp file descriptor can be used, the file descriptor must be configured by udp_setopt(). udp_setopt() gets the configuration data from the process that opened the file descriptor, verifies that the configuration data is valid, sets the uf_udpopt field of the udp file descriptor to reflect the new configuration, and then marks the file descriptor as configured (i.e., sets the UFF_OPTSET flag in uf_flags).


0061475 {
0061476          udp_fd_t *fd_ptr;
0061477          nwio_udpopt_t oldopt, newopt;
0061478          acc_t *data;
0061479          int result;
0061480          udpport_t port;
0061481          unsigned int new_en_flags, new_di_flags, old_en_flags, old_di_flags,
0061482                   all_flags, flags;
0061483          unsigned long new_flags;
0061484          int i;
0061485 
0061486          data= (*udp_fd->uf_get_userdata)(udp_fd->uf_srfd, 0,
0061487                   sizeof(nwio_udpopt_t), TRUE);
Get the configuration data from the process that opened the udp file descriptor. uf_get_userdata was set to sr_get_userdata() on line 61257.


sr_get_userdata()


sr_get_userdata() is the counterpart to sr_put_userdata() and does one of two things:

1) Copies data from a user process to a buffer (to be more specific, a chain of accessors) within the network service (this process). This can be either ioctl data (in which case, for_ioctl is TRUE) or data. For example, udp_setopt() (indirectly) calls sr_get_userdata() to get configuration data. Also, restart_write_fd() (indirectly) calls sr_get_userdata() before passing data onto the ip code.

2) Sends a REVIVE message to the file system (FS). For example, if an illegal option is selected while configuring a udp file descriptor, reply_thr_get() is called, which then (indirectly) calls sr_get_userdata(), passing in EBADMODE for the parameter count. restart_write_fd() also (indirectly) calls sr_get_userdata() to send a REVIVE message back to the FS indicating the number of bytes read after copying the data from the user process.

sr_get_userdata() is often called twice in close succession. The first time to attempt to copy the data from the user process and then the second time to send a message to the FS indicating whether the copy operation was successful and, if it was successful, the number of bytes copied.

In my opinion, like sr_put_userdata(), this function should have been made into two functions. As it is, it is too confusing.


nwio_udpopt_t


struct nwio_udpopt_t contains configuration information for a udp file descriptor. The uf_udpopt field of a udp file descriptor has type nwio_udpopt_t.

typedef struct nwio_udpopt 

{
unsigned long nwuo_flags;
udpport_t nwuo_locport;
udpport_t nwuo_remport;
ipaddr_t nwuo_locaddr;
ipaddr_t nwuo_remaddr;
} nwio_udpopt_t;
nwuo_flags:

#define NWUO_NOFLAGS 0x0000L
#define NWUO_ACC_MASK 0x0003L
#define NWUO_EXCL 0x00000001L
#define NWUO_SHARED 0x00000002L
#define NWUO_COPY 0x00000003L

File descriptors that share the same port must have the same access permissions (i.e., their NWUO_EXCL, NWUO_SHARED, and NWUO_COPY flags are the same).

There does appear to be an error in the code, however. If the NWUO_EXCL (exclusive access) flag is set, only one udp file descriptor should have access to the common udp port (0-65535). Unfortunately, this is not the case. Also, NWUO_COPY has no significance and is never used in the code.

#define NWUO_LOCPORT_MASK 0x000CL
#define NWUO_LP_SEL 0x00000004L
#define NWUO_LP_SET 0x00000008L
#define NWUO_LP_ANY 0x0000000CL

One of the flag sets that cannot be disabled. There are three different choices:

NWUO_LP_SEL: The network service chooses the port. This port will be in the 49152-65535 range and will be unique. Note that this is inconsistent with the ip(4) documentation (which says that the port will be in the 32768-65535 range).

NWUO_LP_SET: The local port is specified in the nwuo_locport field of the nwio_udpopt_t struct (see below).

NWUO_LP_ANY: The file descriptor accepts packets destined for any port.


#define NWUO_LOCADDR_MASK 0x0010L
#define NWUO_EN_LOC 0x00000010L
#define NWUO_DI_LOC 0x00100000L

If enabled, accept packets destined for local address specified by the nwuo_locaddr field of the nwio_udpopt_t struct (see below).

#define NWUO_BROAD_MASK 0x0020L
#define NWUO_EN_BROAD 0x00000020L
#define NWUO_DI_BROAD 0x00200000L

If enabled, accept broadcast packets. If disabled, reject broadcast packets.


#define NWUO_REMPORT_MASK 0x0100L
#define NWUO_RP_SET 0x00000100L
#define NWUO_RP_ANY 0x01000000L
#define NWUO_REMADDR_MASK 0x0200L
#define NWUO_RA_SET 0x00000200L
#define NWUO_RA_ANY 0x02000000L

A udp file descriptor can be configured to only send packets to a specific udp port and ip address. If NWUO_RP_SET and NWUO_RA_SET are enabled, a remote port (nwuo_remport) and a remote address (nwuo_locaddr) must be specified. If NWUO_RP_ANY and/or NWUO_RA_ANY are/is set, the destination may be any udp port and/or any ip address. See below.

#define NWUO_RW_MASK 0x1000L
#define NWUO_RWDATONLY 0x00001000L
#define NWUO_RWDATALL 0x10000000L

If NWUO_RWDATONLY is set, only the data is sent to the ip layer. If NWUO_RWDATALL is set, the source and destination ip addresses, the source and destination udp ports, and other information (e.g., ip options) are sent (in addition to the data) to the ip layer.


#define NWUO_IPOPT_MASK 0x2000L
#define NWUO_EN_IPOPT 0x00002000L
#define NWUO_DI_IPOPT 0x20000000L

If NWUO_EN_IPOPT (ENable IP OPTions) is set, ip options are sent to the ip layer and received from the ip layer. If NWUO_DI_IPOPT (DIsable IP OPTions) is set, ip options are not sent to the ip layer and stripped from the packets received from the ip layer.



If there are any illegal combinations of flags (e.g., the NWUO_RWDATONLY and an inappropriate ..._ANY flag is set), the network service sends the user process an EBADMODE message.

EBADMODE is #define'd in /include/errno.h:

#define EBADMODE (_SIGN 53) /* badmode in ioctl */


nwuo_locport, nwuo_locaddr: The local udp port and address of the udp file descriptor.

nwuo_remport, nwuo_remaddr: (optional) The remote udp port and address of the remote system. If only a single destination is desired, these two fields are set.


0061488 
0061489          if (!data)
0061490                   return EFAULT;
0061491 
0061492          data= bf_packIffLess(data, sizeof(nwio_udpopt_t));
0061493 assert (data->acc_length == sizeof(nwio_udpopt_t));
0061494 
0061495          newopt= *(nwio_udpopt_t *)ptr2acc_data(data);
0061496          bf_afree(data);
0061497          oldopt= udp_fd->uf_udpopt;
0061498 
Below is an example of a mask (NWUO_LOCADDR_MASK) and the associated enable flag (NWUO_EN_LOC) and the associated disable flag (NWUO_DI_LOC).



Get the requested (new) enable and disable flags and the old enable and disable flags. The flags for the udp file descriptor will be a product of the requested flags and the old flags.


0061499          old_en_flags= oldopt.nwuo_flags & 0xffff;
0061500          old_di_flags= (oldopt.nwuo_flags >> 16) & 0xffff;
0061501 
0061502          new_en_flags= newopt.nwuo_flags & 0xffff;
0061503          new_di_flags= (newopt.nwuo_flags >> 16) & 0xffff;
0061504 
0061505          if (new_en_flags & new_di_flags)
It is not allowed to both enable and disable a flag within the NWUO_ACC_MASK set of flags.


0061506          {
0061507                   DBLOCK(1, printf("returning EBADMODE\n"));
0061508 
0061509                   reply_thr_get(udp_fd, EBADMODE, TRUE);
0061510                   return NW_OK;
0061511          }
0061512 
0061513          /* NWUO_ACC_MASK */
0061514          if (new_di_flags & NWUO_ACC_MASK)
0061515          {
0061516                   DBLOCK(1, printf("returning EBADMODE\n"));
0061517 
0061518                   reply_thr_get(udp_fd, EBADMODE, TRUE);
0061519                   return NW_OK;
0061520                   /* access modes can't be disabled */
0061521          }
0061522 
0061523          if (!(new_en_flags & NWUO_ACC_MASK))
0061524                   new_en_flags |= (old_en_flags & NWUO_ACC_MASK);
If the new access enable flags are not set, use the old flags.


0061525 
0061526          /* NWUO_LOCPORT_MASK */
0061527          if (new_di_flags & NWUO_LOCPORT_MASK)
0061528          {
0061529                   DBLOCK(1, printf("returning EBADMODE\n"));
0061530 
0061531                   reply_thr_get(udp_fd, EBADMODE, TRUE);
0061532                   return NW_OK;
0061533                   /* the loc ports can't be disabled */
0061534          }
0061535          if (!(new_en_flags & NWUO_LOCPORT_MASK))
If a new port is not specified for the udp file descriptor, use the old one.


0061536          {
0061537                   new_en_flags |= (old_en_flags & NWUO_LOCPORT_MASK);
0061538                   newopt.nwuo_locport= oldopt.nwuo_locport;
0061539          }
0061540          else if ((new_en_flags & NWUO_LOCPORT_MASK) == NWUO_LP_SEL)
0061541          {
0061542                   newopt.nwuo_locport= find_unused_port(udp_fd-udp_fd_table);
0061543          }
0061544          else if ((new_en_flags & NWUO_LOCPORT_MASK) == NWUO_LP_SET)
0061545          {
0061546                   if (!newopt.nwuo_locport)
0061547                   {
0061548                            DBLOCK(1, printf("returning EBADMODE\n"));
0061549 
0061550                            reply_thr_get(udp_fd, EBADMODE, TRUE);
0061551                            return NW_OK;
0061552                   }
0061553          }
0061554 
0061555          /* NWUO_LOCADDR_MASK */
0061556          if (!((new_en_flags | new_di_flags) & NWUO_LOCADDR_MASK))
0061557          {
0061558                   new_en_flags |= (old_en_flags & NWUO_LOCADDR_MASK);
0061559                   new_di_flags |= (old_di_flags & NWUO_LOCADDR_MASK);
0061560          }
0061561 
0061562          /* NWUO_BROAD_MASK */
0061563          if (!((new_en_flags | new_di_flags) & NWUO_BROAD_MASK))
0061564          {
0061565                   new_en_flags |= (old_en_flags & NWUO_BROAD_MASK);
0061566                   new_di_flags |= (old_di_flags & NWUO_BROAD_MASK);
0061567          }
0061568 
0061569          /* NWUO_REMPORT_MASK */
0061570          if (!((new_en_flags | new_di_flags) & NWUO_REMPORT_MASK))
0061571          {
0061572                   new_en_flags |= (old_en_flags & NWUO_REMPORT_MASK);
0061573                   new_di_flags |= (old_di_flags & NWUO_REMPORT_MASK);
0061574                   newopt.nwuo_remport= oldopt.nwuo_remport;
0061575          }
0061576          
0061577          /* NWUO_REMADDR_MASK */
0061578          if (!((new_en_flags | new_di_flags) & NWUO_REMADDR_MASK))
0061579          {
0061580                   new_en_flags |= (old_en_flags & NWUO_REMADDR_MASK);
0061581                   new_di_flags |= (old_di_flags & NWUO_REMADDR_MASK);
0061582                   newopt.nwuo_remaddr= oldopt.nwuo_remaddr;
0061583          }
0061584 
0061585          /* NWUO_RW_MASK */
0061586          if (!((new_en_flags | new_di_flags) & NWUO_RW_MASK))
0061587          {
0061588                   new_en_flags |= (old_en_flags & NWUO_RW_MASK);
0061589                   new_di_flags |= (old_di_flags & NWUO_RW_MASK);
0061590          }
0061591 
0061592          /* NWUO_IPOPT_MASK */
0061593          if (!((new_en_flags | new_di_flags) & NWUO_IPOPT_MASK))
0061594          {
0061595                   new_en_flags |= (old_en_flags & NWUO_IPOPT_MASK);
0061596                   new_di_flags |= (old_di_flags & NWUO_IPOPT_MASK);
0061597          }
0061598 
0061599          new_flags= ((unsigned long)new_di_flags << 16) | new_en_flags;
This block checks for inconsistent udp options.

If only the data is to be sent to the ip layer, the ip address and udp port need to be known (in other words, there cannot be ..._ANY flags set). In addition, if only the data is to be sent to the ip layer, no ip options may be passed back and forth.


0061600          if ((new_flags & NWUO_RWDATONLY) &&
0061601                   ((new_flags & NWUO_LOCPORT_MASK) == NWUO_LP_ANY ||
0061602                   (new_flags & (NWUO_RP_ANY|NWUO_RA_ANY|NWUO_EN_IPOPT))))
0061603          {
0061604                   DBLOCK(1, printf("returning EBADMODE\n"));
0061605 
0061606                   reply_thr_get(udp_fd, EBADMODE, TRUE);
0061607                   return NW_OK;
0061608          }
0061609 
0061610          /* Check the access modes */
0061611          if ((new_flags & NWUO_LOCPORT_MASK) == NWUO_LP_SEL ||
0061612                   (new_flags & NWUO_LOCPORT_MASK) == NWUO_LP_SET)
0061613          {
0061614                   for (i= 0, fd_ptr= udp_fd_table; i<UDP_FD_NR; i++, fd_ptr++)
Go through all of the udp file desciptors and compare each with the file descriptor currently being configured (i.e., udp_fd). If any of the file descriptors share the same port but do not have the same access permissions (i.e., their NWUO_EXCL, NWUO_SHARED, and NWUO_COPY flags are not the same), send the user process that is configuring the udp file descriptors a message indicating an error.

There does appear to be an error in the code, however. If the NWUO_EXCL (exclusive access) flag is set, only one udp file descriptor should have access to the port. Unfortunately, this is not reflected in the code.


0061615                   {
0061616                            if (fd_ptr == udp_fd)
0061617                                     continue;
This is the udp file descriptor that is currently being compared. Skip over it.


0061618                            if (!(fd_ptr->uf_flags & UFF_INUSE))
0061619                                     continue;
0061620                            if (fd_ptr->uf_port != udp_fd->uf_port)
0061621                                     continue;
0061622                            flags= fd_ptr->uf_udpopt.nwuo_flags;
0061623                            if ((flags & NWUO_LOCPORT_MASK) != NWUO_LP_SEL &&
0061624                                     (flags & NWUO_LOCPORT_MASK) != NWUO_LP_SET)
0061625                                     continue;
0061626                            if (fd_ptr->uf_udpopt.nwuo_locport !=
0061627                                     newopt.nwuo_locport)
0061628                            {
0061629                                     continue;
0061630                            }
0061631                            if ((flags & NWUO_ACC_MASK) !=
0061632                                     (new_flags & NWUO_ACC_MASK))
0061633                            {
0061634                                     DBLOCK(1, printf(
0061635                            "address inuse: new fd= %d, old_fd= %d, port= %u\n",
0061636                                              udp_fd-udp_fd_table,
0061637                                              fd_ptr-udp_fd_table,
0061638                                              newopt.nwuo_locport));
0061639 
0061640                                     reply_thr_get(udp_fd, EADDRINUSE, TRUE);
0061641                                     return NW_OK;
0061642                            }
0061643                   }
0061644          }
0061645 
0061646          if (udp_fd->uf_flags & UFF_OPTSET)
0061647                   unhash_fd(udp_fd);
hash_fd() / unhash_fd() / udp

Hash tables enable quick lookups. The hash table used by the udp code enables a udp file descriptor to be found quickly using its common udp port number (0-65535).

hash_fd(udp_fd) either places the udp file descriptor udp_fd, hash_fd()'s only parameter, at the head of the up_port_any linked list of the file descriptor's udp port (if the file descriptor is not associated with a specific udp port) or the function calculates the hash of the udp file descriptor based on its udp port number and places it at the head of the linked list for that hash value (e.g., up_port_hash[10)).

unhash_fd(udp_fd) removes the udp file descriptor udp_fd, unhash_fd()'s only parameter, from the linked list where hash_fd() inserted it.

Note that the port used for the hash is not the physical udp port.


0061648 
0061649          newopt.nwuo_flags= new_flags;
0061650          udp_fd->uf_udpopt= newopt;
0061651 
This is the last check before declaring a udp file descriptor's options as valid (i.e., before setting the UFF_OPTSET flag in the uf_flags field of the udp file descriptor).

Previously (on line 61505), new_en_flags (enable flags) and new_di_flags (disable flags) were checked to verify that a process was not attempting to both enable and disable a any one flag. Here, the code verifies that at least one flag within each set (i.e., within each "mask") is enabled.

As soon as the UFF_OPSET flag is set, the udp file descriptor may be used (i.e., read from/written to).


0061652          all_flags= new_en_flags | new_di_flags;
0061653          if ((all_flags & NWUO_ACC_MASK) && (all_flags & NWUO_LOCPORT_MASK) &&
0061654                   (all_flags & NWUO_LOCADDR_MASK) &&
0061655                   (all_flags & NWUO_BROAD_MASK) &&
0061656                   (all_flags & NWUO_REMPORT_MASK) &&
0061657                   (all_flags & NWUO_REMADDR_MASK) &&
0061658                   (all_flags & NWUO_RW_MASK) &&
0061659                   (all_flags & NWUO_IPOPT_MASK))
0061660                   udp_fd->uf_flags |= UFF_OPTSET;
The options are acceptable. Set the UFF_OPTSET flag.


0061661          else
0061662          {
0061663                   udp_fd->uf_flags &= ~UFF_OPTSET;
The options are not acceptable. Turn off the UFF_OPTSET flag.


0061664          }
0061665 
0061666          if (udp_fd->uf_flags & UFF_OPTSET)
0061667                   hash_fd(udp_fd);
hash_fd() / unhash_fd() / udp

Hash tables enable quick lookups. The hash table used by the udp code enables a udp file descriptor to be found quickly using its common udp port number (0-65535).

hash_fd(udp_fd) either places the udp file descriptor udp_fd, hash_fd()'s only parameter, at the head of the up_port_any linked list of the file descriptor's udp port (if the file descriptor is not associated with a specific udp port) or the function calculates the hash of the udp file descriptor based on its udp port number and places it at the head of the linked list for that hash value (e.g., up_port_hash[10)).

unhash_fd(udp_fd) removes the udp file descriptor udp_fd, unhash_fd()'s only parameter, from the linked list where hash_fd() inserted it.

Note that the port used for the hash is not the physical udp port.


0061668 
0061669          reply_thr_get(udp_fd, NW_OK, TRUE);
Inform the file system that the operation was successful. Note, however, that this doesn't necessarily mean that the UFF_OPTSET flag was set and that the udp file descriptor is usable (see line 61663).


reply_thr_get() / reply_thr_put() / udp


reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0061670          return NW_OK;
0061671 }
0061672 
0061673 PRIVATE udpport_t find_unused_port(fd)
0061674 int fd;
find_unused_port()

find_unused_port() attempts to claim an unused port number. The first port number that is requested is equal to the index number of the udp file descriptor plus 0xC000 (49152). If this port is not available, the port number is incremented until an unclaimed port number is found. So for the sixth udp file descriptor (i.e., udp_fd_table[5]), find_unused_port() first tries port 49157 (49152 + 5), then tries 49158, and so on until an available port if found.

find_unused_port() is called only a single time: while setting a udp file descriptor's options with udp_setopt() (more specifically, if a user process attempts to set the NWUO_LP_SEL (Local Port SELect) flag).


0061675 {
0061676          udpport_t port, nw_port;
0061677 
0061678          nw_port= htons(0xC000+fd);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0061679          if (is_unused_port(nw_port))
is_unused_port()

is_unused_port() searches through all of the udp file descriptors (i.e., searches through udp_fd_table[]) for a given common udp port (0-65535). If the port is not found, is_unused_port() returns TRUE.


0061680                   return nw_port;
0061681 
0061682          for (port= 0xC000+UDP_FD_NR; port < 0xFFFF; port++)
0061683          {
0061684                   nw_port= htons(port);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0061685                   if (is_unused_port(nw_port))
is_unused_port()

is_unused_port() searches through all of the udp file descriptors (i.e., searches through udp_fd_table[]) for a given common udp port (0-65535). If the port is not found, is_unused_port() returns TRUE.


0061686                            return nw_port;
0061687          }
0061688 #if !CRAMPED
0061689          ip_panic(( "unable to find unused port (shouldn't occur)" ));
0061690          return 0;
0061691 #endif
0061692 }
0061693 
0061694 /*
0061695 reply_thr_put
0061696 */
0061697 
0061698 PRIVATE void reply_thr_put(udp_fd, reply, for_ioctl)
0061699 udp_fd_t *udp_fd;
0061700 int reply;
0061701 int for_ioctl;
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0061702 {
0061703          int result;
0061704 
0061705          result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, reply,
0061706                   (acc_t *)0, for_ioctl);
The uf_get_userdata field of the udp file descriptor was set by udp_open() (see line 61258).


0061707          assert(result == NW_OK);
0061708 }
0061709 
0061710 /*
0061711 reply_thr_get
0061712 */
0061713 
0061714 PRIVATE void reply_thr_get(udp_fd, reply, for_ioctl)
0061715 udp_fd_t *udp_fd;
0061716 int reply;
0061717 int for_ioctl;
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0061718 {
0061719          acc_t *result;
0061720          result= (*udp_fd->uf_get_userdata)(udp_fd->uf_srfd, reply,
0061721                   (size_t)0, for_ioctl);
The uf_get_userdata field of the udp file descriptor was set by udp_open() (see line 61257).


0061722          assert (!result);
0061723 }
0061724 
0061725 PRIVATE int is_unused_port(port)
is_unused_port()

is_unused_port() searches through all of the udp file descriptors (i.e., searches through udp_fd_table[]) for a given common udp port (0-65535). If the port is not found, is_unused_port() returns TRUE.


0061726 udpport_t port;
0061727 {
0061728          int i;
0061729          udp_fd_t *udp_fd;
0061730 
0061731          for (i= 0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++,
0061732                   udp_fd++)
0061733          {
0061734                   if (!(udp_fd->uf_flags & UFF_OPTSET))
0061735                            continue;
If the udp file descriptor is not in use (i.e., UFF_OPTSET is not set), its port number is not important. If the udp file descriptor is used again, it will need to acquire a new port number.


0061736                   if (udp_fd->uf_udpopt.nwuo_locport == port)
0061737                            return FALSE;
0061738          }
0061739          return TRUE;
0061740 }
0061741 
0061742 PRIVATE void read_ip_packets(udp_port)
0061743 udp_port_t *udp_port;
read_ip_packets() / udp

read_ip_packets() is called only a single time, during the initialization of the udp code.

read_ip_packets() reads all packets (by repeatedly calling ip_read()) in the udp port's associated ip file descriptor's read queue until all of the packets have either been delivered to the processes that issued the read requests (provided that the packets have not expired) or placed in the udp file descriptor's read queue.

read_ip_packets() is also called by udp_get_data(). However, this line of code is never executed (see comment for line 61391).


0061744 {
0061745          int result;
0061746 
0061747          do
0061748          {
0061749                   udp_port->up_flags |= UPF_READ_IP;
The UPF_READ_IP flag is meaningless and is essentially always set. At the end of this do...while() block, this flag is cleared. However, the while conditional will always be true (see comment for line 61758) and so the code will always loop back to this line where the flag is set again. The UPF_READ_IP flag is also cleared in udp_put_data(). However, this code will never be executed (see the comment for line 61391).


0061750                   result= ip_read(udp_port->up_ipfd, UDP_MAX_DATAGRAM);
UDP_MAX_DATAGRAM is the maximum size for a udp datagram and is #define'd in inet/generic/udp.h:

#define UDP_MAX_DATAGRAM 40000 /* 8192 */


ip_read()


If there are unexpired packets in the ip file descriptor fd's read queue, ip_read(fd, count) passes count (ip_read()'s second parameter) bytes off to the next-higher layer by calling packet2user(). If the packets in the file descriptor's read queue have expired, ip_read() discards the packets.

ip_read() is (indirectly) called by sr_rwio() when a process reads an ip device file (e.g., /dev/ip).

In the udp code, ip_read() is called by read_ip_packets() during the initialization of the udp code. Normally, ip_read() is not called by the udp code after the initialization.


0061751                   if (result == NW_SUSPEND)
0061752                   {
0061753                            udp_port->up_flags |= UPF_READ_SP;
0061754                            return;
0061755                   }
0061756 assert(result == NW_OK);
0061757                   udp_port->up_flags &= ~UPF_READ_IP;
0061758          } while(!(udp_port->up_flags & UPF_READ_IP));
This conditional (!(udp_port->up_flags & UPF_READ_IP)) is never false. It is a little unclear why this was done rather than simply using "while (1)". One possible explanation is that this was done to give more time to the network replies.


0061759 }
0061760 
0061761 
0061762 PUBLIC int udp_read (fd, count)
udp_read()

If there is a packet in the udp file descriptor's read queue and the packet has not expired, udp_read() sends the packet to the process that requested the read. If the packet cannot be delivered (either because the packet has expired or it hasn't arrived yet), udp_read() suspends the read operation (i.e., returns NW_SUSPEND to sr_rwio() and sets the UFF_READ_IP flag). If the packet has expired, all packets in the read queue of the udp file descriptor are discarded.


0061763 int fd;
0061764 size_t count;
0061765 {
0061766          udp_fd_t *udp_fd;
0061767          acc_t *tmp_acc, *next_acc;
0061768 
Find the udp file descriptor that has an index of fd (udp_read()'s first parameter) within udp_fd_table[]. Also, verify that the file descriptor has been configured by udp_ioctl() (line 61425). Note that after udp_ioctl() configures the udp file descriptor, it marks the file descriptor as configured (i.e., it sets the UFF_OPTSET flag).


0061769          udp_fd= &udp_fd_table[fd];
0061770          if (!(udp_fd->uf_flags & UFF_OPTSET))
0061771          {
0061772                   reply_thr_put(udp_fd, EBADMODE, FALSE);
0061773                   return NW_OK;
0061774          }
0061775 
0061776          udp_fd->uf_rd_count= count;
count, udp_read()'s second parameter, is the number of bytes requested to be read.


0061777 
0061778          if (udp_fd->uf_rdbuf_head)
uf_rdbuf_head is the queue of packets received by the a href="general-comment-display.php?commentid=35" target=new>udp file descriptor.


0061779          {
0061780                   if (get_time() <= udp_fd->uf_exp_tim)
When the read queue of a udp file descriptor is empty and a packet is placed in the queue, the timer (uf_exp_tim) is set. If this timer expires before the packet is read, this packet and all packets that have been subsequently placed in the queue are discarded.

If the packet has not expired (the normal case), udp_packet2user() is called.


get_time()


get_time() returns the number of clock ticks since reboot.

Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.


0061781                            return udp_packet2user (udp_fd);
udp_packet2user()

udp_packet2user(udp_fd) delivers the first packet in the read queue (if it has not expired) of the udp file descriptor udp_fd, udp_packet2user's only parameter, to the process that opened the file descriptor and then sends a message to the file system specifying whether the operation was successful.

If the NWUO_RWDATONLY flag of the file descriptor is set, the packet is delivered without the pseudo udp header.


0061782                   tmp_acc= udp_fd->uf_rdbuf_head;
If this point in the code has been reached, the packet has expired. Remove all the packets in the read queue.


0061783                   while (tmp_acc)
0061784                   {
0061785                            next_acc= tmp_acc->acc_ext_link;
The acc_ext_link field of an accessor links multiple packets in a queue (in this case, the read queue).

For a full description of the network service's buffer management, click here.


0061786                            bf_afree(tmp_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061787                            tmp_acc= next_acc;
0061788                   }
0061789                   udp_fd->uf_rdbuf_head= NULL;
0061790          }
By setting the UFF_READ_IP flag in uf_flags and returning NW_SUSPEND, udp_read() indicates that the udp file descriptor is waiting for a packet from the lower layers (i.e., the ip code).


0061791          udp_fd->uf_flags |= UFF_READ_IP;
The UFF_READ_IP flag (unlike the UPF_READ_IP flag) is a very important flag. If the UFF_READ_IP flag is set, a read request was received for the udp file descriptor but the request could not be satisifed. Eventually, when a packet for the udp file descriptor arrives (and udp_ip_arrived() is called), the UFF_READ_IP flag will indicate that the process that opened the file descriptor wants to read this packet.

udp_packet2user() clears the UFF_READ_IP flag after copying the packet to the process that requested the read.


0061792          return NW_SUSPEND;
0061793 }
0061794 
0061795 PRIVATE int udp_packet2user (udp_fd)
0061796 udp_fd_t *udp_fd;
udp_packet2user()

udp_packet2user(udp_fd) delivers the first packet in the read queue (if it has not expired) of the udp file descriptor udp_fd, udp_packet2user's only parameter, to the process that opened the file descriptor and then sends a message to the file system specifying whether the operation was successful.

If the NWUO_RWDATONLY flag of the file descriptor is set, the packet is delivered without the pseudo udp header.


0061797 {
0061798          acc_t *pack, *tmp_pack;
0061799          udp_io_hdr_t *hdr;
0061800          int result, hdr_len;
0061801          size_t size, transf_size;
0061802 
0061803          pack= udp_fd->uf_rdbuf_head;
The read queue of the udp file descriptor is as follows:



Each rectangle in the figure is an accessor with an associated buffer. The accessors within a packet are linked by the acc_next field (red lines) and the packets are linked by the acc_ext_link field (green lines). This figure shows three packets linked together.

The first packet in the queue (the packet that will soon be delivered to the process that requested the read) is removed from the queue.


0061804          udp_fd->uf_rdbuf_head= pack->acc_ext_link;
0061805 
0061806          size= bf_bufsize (pack);
bf_bufsize()

bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).

For a detailed description of the network service's buffer management, click here.


0061807 
0061808          if (udp_fd->uf_udpopt.nwuo_flags & NWUO_RWDATONLY)
Strip the pseudo udp header, including ip options (if they exist), from the packet if the NWUO_RWDATONLY flag is set.

From ip(4):

"With NWUO_RWDATONLY only the data part of a UDP packet is sent to the server and only the data part is received from the server".


0061809          {
0061810 
0061811                   pack= bf_packIffLess (pack, UDP_IO_HDR_SIZE);
UDP_IO_HDR_SIZE is the size of the pseudo udp header and is #define'd in /include/net/gen/udp.h:

#define UDP_IO_HDR_SIZE 16


bf_packIffLess()


If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0061812                   assert (pack->acc_length >= UDP_IO_HDR_SIZE);
0061813 
0061814                   hdr= (udp_io_hdr_t *)ptr2acc_data(pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.udp_io_hdr

The "pseudo" udp header (not the standard udp header) is of type uih_io_hdr_t. uih_io_hdr_t is declared in include/net/gen/udp_hdr.h:

typedef struct udp_io_hdr

{
ipaddr_t uih_src_addr;
ipaddr_t uih_dst_addr;
udpport_t uih_src_port;
udpport_t uih_dst_port;
u16_t uih_ip_opt_len;
u16_t uih_data_len;
} udp_io_hdr_t;
uih_src_addr, uih_dst_addr, uih_src_port, uih_dst_port: Source and destination ip addresses and ports

uih_ip_opt_len: length of the ip options (zero if none exist)

uih_data_len: length of the data

If a udp file descriptor is configured appropriately, a process writing data to the udp file descriptor must prepend a pseudo udp header to the data, thereby specifying the values (given above) in the outgoing udp and ip headers. A udp pseudo header is also prepended to an otherwise header-less packet being copied to the process that requested a read.


0061815 #if CONF_UDP_IO_NW_BYTE_ORDER
0061816                   hdr_len= UDP_IO_HDR_SIZE+NTOHS(hdr->uih_ip_opt_len);
0061817 #else
0061818                   hdr_len= UDP_IO_HDR_SIZE+hdr->uih_ip_opt_len;
0061819 #endif
0061820 
0061821                   assert (size>= hdr_len);
Remove the pseudo udp header from the packet.


0061822                   size -= hdr_len;
0061823                   tmp_pack= bf_cut(pack, hdr_len, size);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0061824                   bf_afree(pack);
The pseudo udp header is no longer needed.


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061825                   pack= tmp_pack;
0061826          }
0061827 
0061828          if (size>udp_fd->uf_rd_count)
If this conditional is true, the process that requested the read did not request the entire packet. Give the process what it requested but report an error to the file system (see line 61843).


0061829          {
0061830                   tmp_pack= bf_cut (pack, 0, udp_fd->uf_rd_count);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0061831                   bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061832                   pack= tmp_pack;
0061833                   transf_size= udp_fd->uf_rd_count;
0061834          }
0061835          else
0061836                   transf_size= size;
0061837 
0061838          result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd,
0061839                   (size_t)0, pack, FALSE);
The uf_put_userdata field is set to sr_put_userdata() by sr_add_minor(). This call to sr_put_userdata() copies the packet to the process that requested the read.


sr_put_userdata()


sr_put_userdata(fd, offset, data, for_ioctl) is the counterpart to sr_get_userdata() and (like sr_get_userdata()) does one of two things:

1) Copies data from a buffer (to be more specific, a chain of accessors) within the network service (this process) to a buffer within the user process. This can be either ioctl data (in which case, for_ioctl is TRUE) or read/write data (for_ioctl is FALSE). For example, udp_ioctl() (indirectly) calls sr_put_userdata() to give configuration data to a user process. Also, udp_packet2user() (indirectly) calls sr_get_userdata() to pass data to the user process.

2) Sends a message to the FS. For example, if a read is attempted on a udp file descriptor before the file descriptor is configured, reply_thr_put() is called, which then (indirectly) calls sr_put_userdata(), passing in EBADMODE for the parameter count.

In my opinion, like sr_get_userdata(), this should have been made into two functions. As it is, it is too confusing.


0061840 
0061841          if (result >= 0)
0061842                   if (size > transf_size)
0061843                            result= EPACKSIZE;
The process did not request the entire packet. Report it to the file system.

EPACKSIZE is #define'd in /include/errno.h:

#define EPACKSIZE (_SIGN 50) /* invalid packet size for some protocol */


0061844                   else
0061845                            result= transf_size;
0061846 
0061847          udp_fd->uf_flags &= ~UFF_READ_IP;
The read operation, whether successful or unsuccessful, is finished. Therefore, clear the udp file descriptor's read flag (UFF_READ_IP).


0061848          result= (*udp_fd->uf_put_userdata)(udp_fd->uf_srfd, result,
0061849                            (acc_t *)0, FALSE);
Again, the uf_put_userdata field was set to sr_put_userdata() by sr_add_minor(). This call to sr_put_userdata() sends a message to the file system, indicating whether the previous copy (see line 61839) was successful and, if it was successful, reports the number of bytes transferred.


sr_put_userdata()


sr_put_userdata(fd, offset, data, for_ioctl) is the counterpart to sr_get_userdata() and (like sr_get_userdata()) does one of two things:

1) Copies data from a buffer (to be more specific, a chain of accessors) within the network service (this process) to a buffer within the user process. This can be either ioctl data (in which case, for_ioctl is TRUE) or read/write data (for_ioctl is FALSE). For example, udp_ioctl() (indirectly) calls sr_put_userdata() to give configuration data to a user process. Also, udp_packet2user() (indirectly) calls sr_get_userdata() to pass data to the user process.

2) Sends a message to the FS. For example, if a read is attempted on a udp file descriptor before the file descriptor is configured, reply_thr_put() is called, which then (indirectly) calls sr_put_userdata(), passing in EBADMODE for the parameter count.

In my opinion, like sr_get_userdata(), this should have been made into two functions. As it is, it is too confusing.


0061850 assert (result == 0);
0061851 
0061852          return result;
0061853 }
0061854 
0061855 PRIVATE void udp_ip_arrived(port, pack, pack_size)
0061856 int port;
0061857 acc_t *pack;
0061858 size_t pack_size;
udp_ip_arrived()

udp_ip_arrived() is called either (indirectly) from the ip code or from udp_put_data() and is one of the last functions called for a read request from a user process. udp_ip_arrived() does some checks (e.g., a checksum check) and figures out the destination udp file descriptor before calling udp_packet2user() to deliver the packet to the process that requested the read.


0061859 {
0061860          udp_port_t *udp_port;
0061861          udp_fd_t *udp_fd, *share_fd;
0061862          acc_t *ip_hdr_acc, *udp_acc, *ipopt_pack, *no_ipopt_pack, *tmp_acc;
0061863          ip_hdr_t *ip_hdr;
0061864          udp_hdr_t *udp_hdr;
0061865          udp_io_hdr_t *udp_io_hdr;
0061866          size_t ip_hdr_size, udp_size, data_size, opt_size;
0061867          ipaddr_t src_addr, dst_addr;
0061868          udpport_t src_port, dst_port;
0061869          u8_t u16[2];
0061870          u16_t chksum;
0061871          unsigned long dst_type, flags;
0061872          time_t exp_tim;
0061873          int i, delivered, hash;
0061874 
0061875          udp_port= &udp_port_table[port];
Find the relevant udp port in udp_port_table[].


0061876 
The code lines below (lines 61877-61900) separate the ip header from rest of the packet.


0061877          ip_hdr_acc= bf_cut(pack, 0, IP_MIN_HDR_SIZE);
ip_hdr_acc now points to a linked list of accessors that holds the ip header.

The minimum size of an ip packet is #define'd in /include/net/gen/in.h:

#define IP_MIN_HDR_SIZE 20



bf_cut()


If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0061878          ip_hdr_acc= bf_packIffLess(ip_hdr_acc, IP_MIN_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0061879          ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_hdr_acc);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061880          ip_hdr_size= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
ih_vers_ihl is stored in a single byte of the ih_vers_ihl field. Therefore, it is necessary to mask out the unwanted bits.

IH_IHL_MASK is #define'd as:

#define IH_IHL_MASK 0xf

Since ih_vers_ihl is the header length (including the options) divided by 4, shifting left by 2 gives the actual length.


ip_hdr_t


struct ip_hdr_t is the structure of an ip header. "ih" (e.g., ih_src, ih_dst) stands for "Ip Header".

ip_hdr_t is declared in /include/net/gen/ip_hdr.h:

typedef struct ip_hdr

{
u8_t ih_vers_ihl, ih_tos;
u16_t ih_length, ih_id, ih_flags_fragoff;
u8_t ih_ttl, ih_proto;
u16_t ih_hdr_chk;
ipaddr_t ih_src, ih_dst;
} ip_hdr_t;

ih_vers_ihl: The lower 4 bits is the length of the header plus options (if there are any) shifted by 2 bit positions (i.e., its actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

The upper four bits is the version number (e.g., IPv4).


ih_tos: tos stands for "Type Of Service" and is the priority of the ip packet. A value of zero is the lowest priority. Both UDP and TCP have a default TOS of zero.

#define TCP_DEF_TOS 0
#define UDP_TOS 0


ih_length: The length of the entire ip packet, including the ip header.


ih_id: The value of ih_id for the first packet sent out is determined by ip_init() and is equal to the number of clock ticks since reboot (i.e., the value returned by get_time) and is incremented for each packet sent out. This value is used to combine fragments at the receiving end if fragmentation has occurred.


ih_flags_fragoff: ih_flags_fragoff is a combination of flags and a (possible) fragmentation offset ("fragoff").

If the packet should not be fragmented, ih_flags_fragoff is set to IH_DONT_FRAG. If there are additional fragments (e.g., the 3rd fragment of 4 fragments), ih_flags_fragoff is set to IH_MORE_FRAGS.

If the packet is indeed just a fragment of a packet, this value indicates the starting byte position (in 8 byte increments) of the original ip packet's data. So for example, if an ip packet of data size (not including the ip header) is broken up into two fragments of 1496 and 504 bytes each, the first fragment would have a fragmentation offset of 0 bytes and the second fragment would have a fragmentation offset of 1496 bytes and ih_flags_fragoff is therefore 187 (1496 / 8 = 187).


ih_ttl: "Time to live" for the packet. As a packet is routed to the destination, each router decrements the packet's ttl. When the ttl reaches 0, the router sends an "icmp unreachable" packet to the source. The ttl is designed to prevent packets that can't reach their destination from indefinitely bouncing around between routers. UDP's default TTL is 30:

#define UDP_TTL 30

Note that the Minix code also uses this value as a timeout value (in seconds). This code was written before the ttl field was redefined to be strictly a hope count. The original IP RFC defines the ttl field as the time to live in seconds.


ih_proto: The protocol of the ip packet. For example, if the packet is a udp packet, ih_proto will be 17. If the packet is a tcp packet, ih_proto will be 6.


ih_hdr_chk: Checksum for the header.


ih_src, ih_dst: Source and destination ip address of the ip packet.


IP HEADER (as given by RFC 791)


0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



0061881          if (ip_hdr_size != IP_MIN_HDR_SIZE)
Make sure that the ip header is in one contiguous buffer. This may not be the case if the ip header has options.


0061882          {
0061883                   bf_afree(ip_hdr_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061884                   ip_hdr_acc= bf_cut(pack, 0, ip_hdr_size);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0061885                   ip_hdr_acc= bf_packIffLess(ip_hdr_acc, ip_hdr_size);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0061886                   ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_hdr_acc);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061887          }
0061888 
On line 61879 (or alternatively on line 61886), the data in the buffer that corresponded to the ip header was copied to the variable ip_hdr. Therefore, this part of the buffer can now be discarded.


0061889          udp_acc= bf_delhead(pack, ip_hdr_size);
bf_delhead()

If only the beginning of a linked list can be freed, bf_delhead() is called. If acc_linkC and buf_linkC are one for all of the relevant accessors and their associated buffers in the linked list, the operation is straight-forward:



bf_delhead() is often called to remove the header (e.g., ip header) from a packet.

For a detailed description of the network service's buffer management, click here.


0061890          pack= NULL;
0061891 
0061892          pack_size -= ip_hdr_size;
0061893          if (pack_size < UDP_HDR_SIZE)
There's something very wrong if the data remaining after the ip packet was stripped isn't even as large as a udp header.


0061894          {
0061895                   DBLOCK(1, printf("packet too small\n"));
0061896 
0061897                   bf_afree(ip_hdr_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061898                   bf_afree(udp_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061899                   return;
0061900          }
0061901 
0061902          udp_acc= bf_packIffLess(udp_acc, UDP_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0061903          udp_hdr= (udp_hdr_t *)ptr2acc_data(udp_acc);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061904          udp_size= ntohs(udp_hdr->uh_length);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0061905          if (udp_size > pack_size)
If the data in the buffers is not as large as the udp header claims (i.e., as large as the value in uh_length), there's a problem.


0061906          {
0061907                   DBLOCK(1, printf("packet too large\n"));
0061908 
0061909                   bf_afree(ip_hdr_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061910                   bf_afree(udp_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061911                   return;
0061912          }
0061913 
0061914          src_addr= ip_hdr->ih_src;
0061915          dst_addr= ip_hdr->ih_dst;
The ip header (obviously) contains the source and destination ip addresses.


0061916 
0061917          if (udp_hdr->uh_chksum)
The checksum for a udp packet is optional. If the checksum exists, compare the checksum field with the checksum calculated from the packet.

The checksum is calculated with the following data:



For a detailed description of the checksum algorithm, click here.


0061918          {
0061919                   u16[0]= 0;
0061920                   u16[1]= ip_hdr->ih_proto;
0061921                   chksum= pack_oneCsum(udp_acc);
pack_oneCsum()

pack_oneCsum() computes the checksum of a udp packet. It accomplishes this by computing the checksum (by calling oneC_sum() of each of the packet's buffers).

Note that a checksum is used to determine if errors occurred during the transmission of data.

pack_oneCsum() is very similar to icmp_pack_oneCsum(). The two functions probably should have been consolidated into one.


0061922                   chksum= oneC_sum(chksum, (u16_t *)&src_addr, sizeof(ipaddr_t));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0061923                   chksum= oneC_sum(chksum, (u16_t *)&dst_addr, sizeof(ipaddr_t));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0061924                   chksum= oneC_sum(chksum, (u16_t *)u16, sizeof(u16));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0061925                   chksum= oneC_sum(chksum, (u16_t *)&udp_hdr->uh_length,
0061926                            sizeof(udp_hdr->uh_length));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0061927                   if (~chksum & 0xffff)
The checksum should be 0xFFFF. Otherwise, there was a transmission error.


0061928                   {
0061929                            DBLOCK(1, printf("checksum error in udp packet\n");
0061930                                     printf("src ip_addr= ");
0061931                                     writeIpAddr(src_addr);
0061932                                     printf(" dst ip_addr= ");
0061933                                     writeIpAddr(dst_addr);
0061934                                     printf("\n");
0061935                                     printf("packet chksum= 0x%x, sum= 0x%x\n",
0061936                                              udp_hdr->uh_chksum, chksum));
0061937 
0061938                            bf_afree(ip_hdr_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061939                            bf_afree(udp_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0061940                            return;
0061941                   }
0061942          }
0061943 
0061944          exp_tim= get_time() + UDP_READ_EXP_TIME;
The expiration time is 10 seconds (= 600 clock "ticks"):

UDP_READ_EXP_TIME is #define'd in src/inet/generic/udp.h:

#define UDP_READ_EXP_TIME (10L * HZ)

HZ is #define'd in /include/minix/const.h:

#define HZ 60 /* clock freq (software settable on IBM-PC) */


0061945          src_port= udp_hdr->uh_src_port;
0061946          dst_port= udp_hdr->uh_dst_port;
Acquire the source and destination ports from the UDP header.


0061947 
0061948          /* Send an ICMP port unreachable if the packet could not be
0061949           * delivered.
0061950           */
0061951          delivered= 0;
0061952 
From ip(4):

"NWUO_EN_LOC enables the reception of packets with the local IP address as destination; NWUO_EN_BROAD enables the reception of broadcast packets."

The variable dst_type is used on line 62033.


0061953          if (dst_addr == udp_port->up_ipaddr)
0061954                   dst_type= NWUO_EN_LOC;
0061955          else
0061956          {
0061957                   dst_type= NWUO_EN_BROAD;
0061958 
0061959                   /* Don't send ICMP error packets for broadcast packets */
0061960                   delivered= 1;
0061961          }
0061962 
0061963          DBLOCK(0x20, printf("udp: got packet from ");
0061964                   writeIpAddr(src_addr);
0061965                   printf(".%u to ", ntohs(src_port));
0061966                   writeIpAddr(dst_addr);
0061967                   printf(".%u\n", ntohs(dst_port)));
0061968 
0061969          no_ipopt_pack= bf_memreq(UDP_IO_HDR_SIZE);
The packet will be delivered to the process that requested the read with a udp "pseudo" header.


bf_memreq()


After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


udp_io_hdr


The "pseudo" udp header (not the standard udp header) is of type uih_io_hdr_t. uih_io_hdr_t is declared in include/net/gen/udp_hdr.h:

typedef struct udp_io_hdr

{
ipaddr_t uih_src_addr;
ipaddr_t uih_dst_addr;
udpport_t uih_src_port;
udpport_t uih_dst_port;
u16_t uih_ip_opt_len;
u16_t uih_data_len;
} udp_io_hdr_t;
uih_src_addr, uih_dst_addr, uih_src_port, uih_dst_port: Source and destination ip addresses and ports

uih_ip_opt_len: length of the ip options (zero if none exist)

uih_data_len: length of the data

If a udp file descriptor is configured appropriately, a process writing data to the udp file descriptor must prepend a pseudo udp header to the data, thereby specifying the values (given above) in the outgoing udp and ip headers. A udp pseudo header is also prepended to an otherwise header-less packet being copied to the process that requested a read.


0061970          udp_io_hdr= (udp_io_hdr_t *)ptr2acc_data(no_ipopt_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0061971          udp_io_hdr->uih_src_addr= src_addr;
0061972          udp_io_hdr->uih_dst_addr= dst_addr;
0061973          udp_io_hdr->uih_src_port= src_port;
0061974          udp_io_hdr->uih_dst_port= dst_port;
0061975          data_size = udp_size-UDP_HDR_SIZE;
0061976 #if CONF_UDP_IO_NW_BYTE_ORDER
0061977          udp_io_hdr->uih_ip_opt_len= HTONS(0);
0061978          udp_io_hdr->uih_data_len= htons(data_size);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0061979 #else
0061980          udp_io_hdr->uih_ip_opt_len= 0;
0061981          udp_io_hdr->uih_data_len= data_size;
0061982 #endif
0061983          no_ipopt_pack->acc_next= bf_cut(udp_acc, UDP_HDR_SIZE, data_size);
Link the udp pseudo header with the udp packet (minus its udp header). In other words, link the udp pseudo header with the data.


bf_cut()


If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0061984 
If there are ip options, no_ipopt_pack and ipopt_pack will be as follows:



If there are no ip options, ipopt_pack and no_ipopt_pack will be as follows:




If there are ip options, give them to the user if requested and don't give them to to the user if they are not requested (see lines 62061-62064 and 62075-62078).



0061985          if (ip_hdr_size == IP_MIN_HDR_SIZE)
0061986          {
0061987                   ipopt_pack= no_ipopt_pack;
0061988                   ipopt_pack->acc_linkC++;
0061989          }
If there were ip options included in the ip header:

1) copy the udp pseudo header to a new pseudo udp header (lines 61992-61994)
2) create a buffer with the ip options (lines 61995-62001)
3) link the udp pseudo header to the buffer with the options to the data, in that order (lines 62004-62008)


0061990          else
0061991          {
0061992                   ipopt_pack= bf_memreq(UDP_IO_HDR_SIZE);
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0061993                   *(udp_io_hdr_t *)ptr2acc_data(ipopt_pack)= *udp_io_hdr;
0061994                   udp_io_hdr= (udp_io_hdr_t *)ptr2acc_data(ipopt_pack);
0061995                   opt_size = ip_hdr_size-IP_MIN_HDR_SIZE;
opt_size is the size of the IP header options. ip_hdr_size was calculated on line 61880.


0061996 #if CONF_UDP_IO_NW_BYTE_ORDER
0061997                   udp_io_hdr->uih_ip_opt_len= htons(opt_size);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0061998 #else
0061999                   udp_io_hdr->uih_ip_opt_len= opt_size;
0062000 #endif
0062001                   tmp_acc= bf_cut(ip_hdr_acc, (size_t)IP_MIN_HDR_SIZE, opt_size);
tmp_acc now holds the ip options.


bf_cut()


If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0062002                   assert(tmp_acc->acc_linkC == 1);
0062003                   assert(tmp_acc->acc_next == NULL);
0062004                   ipopt_pack->acc_next= tmp_acc;
Link the udp pseudo header with the ip options.


0062005 
0062006                   tmp_acc->acc_next= no_ipopt_pack->acc_next;
no_ipopt_pack->acc_next is the data (see line 61983).


0062007                   if (tmp_acc->acc_next)
0062008                            tmp_acc->acc_next->acc_linkC++;
The acc_linkC field of a buffer is the count of the number of buffers that whose acc_next field references it. For example, if the acc_next field of two buffers point to a third buffer, acc_linkC for the third buffer will be 2.

For a detailed description of the network service's buffer management, click here.


0062009          }
0062010 
In order to quickly find the udp file descriptor that corresponds to the destination common port number (0-65535), the udp code uses a hash. The goal of this hash is to associate a nearly equal number of open file descriptors with each element in udp_port->up_port_hash[] (see lin 62020).

From line 61021:

#define UDP_PORT_HASH_NR 16 /* Must be a power of 2 */


0062011          hash= dst_port;
0062012          hash ^= (hash >> 8);
0062013          hash &= (UDP_PORT_HASH_NR-1);
0062014 
0062015          for (i= 0; i<2; i++)
0062016          {
0062017                   share_fd= NULL;
0062018 
0062019                   udp_fd= (i == 0) ? udp_port->up_port_any :
0062020                            udp_port->up_port_hash[hash];
Search through the udp port's up_port_any linked list of udp file descriptors and the linked list of udp file descriptors for the appropriate element of up_port_hash[]. If various conditions hold for a given udp file descriptor (e.g., the destination port is correct), the packet is delivered.


0062021                   for (; udp_fd; udp_fd= udp_fd->uf_port_next)
0062022                   {
0062023                            if (i && udp_fd->uf_udpopt.nwuo_locport != dst_port)
0062024                                     continue;
The udp file descriptor's port number (e.g., port 49160) must match with the port number of the packet.


0062025                   
0062026                            assert(udp_fd->uf_flags & UFF_INUSE);
0062027                            assert(udp_fd->uf_flags & UFF_OPTSET);
0062028                   
0062029                            if (udp_fd->uf_port != udp_port)
0062030                                     continue;
The udp file descriptor that matched above must be associated with the physical udp port (e.g., /dev/udp0) on which the packet arrived. A udp file descriptor may be associated with only a single physical udp port.


0062031 
0062032                            flags= udp_fd->uf_udpopt.nwuo_flags;
The nwuo_flags field of the udp file descriptor is used to determine if the udp file descriptor accepts the packet.


0062033                            if (!(flags & dst_type))
0062034                                     continue;
Verify that the udp file descriptor accepts the same types of packets (e.g., broadcast packets).

dst_type is NWUO_EN_LOC (if the physical udp port has an ip address) or NWUO_EN_BROAD (if the physical udp port does not have an ip address). See lines 61953-61957.


0062035 
0062036                            if ((flags & NWUO_RP_SET) &&
0062037                                     udp_fd->uf_udpopt.nwuo_remport != src_port)
0062038                            {
0062039                                     continue;
0062040                            }
If the NWUO_RP_SET flag is set, verify that the udp file descriptor's remote udp port (nwuo_remport) field is the same as the packet's.

From ip(4):

"When NWUO_RP_SET is selected, the remote port is stored in nwuo_remport."



0062041 
0062042                            if ((flags & NWUO_RA_SET) &&
0062043                                     udp_fd->uf_udpopt.nwuo_remaddr != src_addr)
0062044                            {
0062045                                     continue;
0062046                            }
If the NWUO_RA_SET flag is set, verify that the udp file descriptor's remote ip address (nwuo_remaddr) field is the same as the packet's.

From ip(4):

"When NWUO_RA_SET is selected, the remote address is stored in nwuo_remaddr."


0062047 
0062048                            if (i)
i will be 1 if the udp file descriptor is in the udp port's port-specific linked list (i.e., not in the up_port_any linked list).


0062049                            {
0062050                                     /* Packet is considdered to be delivered */
0062051                                     delivered= 1;
0062052                            }
0062053 
0062054                            if ((flags & NWUO_ACC_MASK) == NWUO_SHARED &&
0062055                                     (!share_fd || !udp_fd->uf_rdbuf_head))
From ip(4):

"NWUO_SHARED means shared access: only channels that specify shared access can use this port and all packets that are received are handed to at most one channel".


0062056                            {
0062057                                     share_fd= udp_fd;
0062058                                     continue;
0062059                            }
0062060 
0062061                            if (flags & NWUO_EN_IPOPT)
0062062                                     pack= ipopt_pack;
0062063                            else
0062064                                     pack= no_ipopt_pack;
If the udp file descriptor accepts ip options, include the ip options. See the comment for line 61984.

From ip(4):

"When NWUO_EN_IPOPT is set IP, options will be delivered and sent."


0062065 
0062066                            pack->acc_linkC++;
Before a packet is placed in a udp file descriptor's read queue, the first accessor is copied and then the copy is placed in the queue. Incrementing acc_linkC forces udp_rd_enqueue() to create this copy of the accessor (see lines 62501-62506). By having a different first accessor for each udp file descriptor, the packets in the read queue of each of the different file descriptors can be linked to different packets if packets arrive before the packets are handed off to their respective processes.


0062067                            udp_rd_enqueue(udp_fd, pack, exp_tim);
udp_rd_enqueue()

udp_rd_enqueue(udp_fd, pack, exp_tim) places the packet pack, udp_rd_enqueue()'s second parameter, into the read queue of the udp file descriptor udp_fd, udp_rd_enqueue()'s first parameter. If the packet is the first in the queue, udp_rd_enqueue() sets the packet's expiration time to exp_tim, udp_rd_enqueu()'s third parameter.

udp_rd_enqueue() is called in two places by udp_ip_arrived() after a packet arrives. If a read request was made on a udp file descriptor to which the packet is destined, the packet is delivered immediately. If a read request was not made, the packet remains in the udp file descriptor's read queue until the udp file descriptor makes a read request.


0062068                            if (udp_fd->uf_flags & UFF_READ_IP)
0062069                                     udp_packet2user(udp_fd);
If the udp file descriptor is being read (i.e., the UFF_READ_IP flag is set), deliver the packet to the user.


udp_packet2user()


udp_packet2user(udp_fd) delivers the first packet in the read queue (if it has not expired) of the udp file descriptor udp_fd, udp_packet2user's only parameter, to the process that opened the file descriptor and then sends a message to the file system specifying whether the operation was successful.

If the NWUO_RWDATONLY flag of the file descriptor is set, the packet is delivered without the pseudo udp header.


0062070                   }
0062071 
0062072                   if (share_fd)
Code lines 62072-62084 mirror lines 62061-62070. The code lines below deliver the packet to the udp file descriptor that is configured to share the port.


0062073                   {
0062074                            flags= share_fd->uf_udpopt.nwuo_flags;
0062075                            if (flags & NWUO_EN_IPOPT)
0062076                                     pack= ipopt_pack;
0062077                            else
0062078                                     pack= no_ipopt_pack;
If the udp file descriptor accepts ip options, include the ip options. See the comment for line 61984.

From ip(4):

"When NWUO_EN_IPOPT is set IP, options will be delivered and sent."


0062079 
0062080                            pack->acc_linkC++;
0062081                            udp_rd_enqueue(share_fd, pack, exp_tim);
udp_rd_enqueue()

udp_rd_enqueue(udp_fd, pack, exp_tim) places the packet pack, udp_rd_enqueue()'s second parameter, into the read queue of the udp file descriptor udp_fd, udp_rd_enqueue()'s first parameter. If the packet is the first in the queue, udp_rd_enqueue() sets the packet's expiration time to exp_tim, udp_rd_enqueu()'s third parameter.

udp_rd_enqueue() is called in two places by udp_ip_arrived() after a packet arrives. If a read request was made on a udp file descriptor to which the packet is destined, the packet is delivered immediately. If a read request was not made, the packet remains in the udp file descriptor's read queue until the udp file descriptor makes a read request.


0062082                            if (share_fd->uf_flags & UFF_READ_IP)
0062083                                     udp_packet2user(share_fd);
udp_packet2user()

udp_packet2user(udp_fd) delivers the first packet in the read queue (if it has not expired) of the udp file descriptor udp_fd, udp_packet2user's only parameter, to the process that opened the file descriptor and then sends a message to the file system specifying whether the operation was successful.

If the NWUO_RWDATONLY flag of the file descriptor is set, the packet is delivered without the pseudo udp header.


0062084                   }
0062085          }
0062086 
ipopt_pack and no_ipopt_pack are no longer needed so their corresponding accessors can be freed. ipopt_pack and no_ipopt_pack were described on line 61984.


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062087          if (ipopt_pack)
0062088                   bf_afree(ipopt_pack);
0062089          if (no_ipopt_pack)
0062090                   bf_afree(no_ipopt_pack);
0062091 
0062092          if (!delivered)
If the packet is successfully delivered to a udp file descriptor (lines 62048-62052) or the packet is a broadcast packet (lines 61953-61961), don't deliver an icmp unreachable packet. Otherwise, send an icmp unreachable packet back to the source.


0062093          {
0062094                   DBLOCK(0x2, printf("udp: could not deliver packet from ");
0062095                            writeIpAddr(src_addr);
0062096                            printf(".%u to ", ntohs(src_port));
0062097                            writeIpAddr(dst_addr);
0062098                            printf(".%u\n", ntohs(dst_port)));
0062099 
0062100                   pack= bf_append(ip_hdr_acc, udp_acc);
bf_append()

bf_append() appends one accessor linked list to another accessor linked list. For example, if the payload of an ethernet packet (1500 bytes) is appended to an ethernet header (14 bytes):



the resulting linked list is as follows:






0062101                   ip_hdr_acc= NULL;
0062102                   udp_acc= NULL;
0062103                   icmp_snd_unreachable(udp_port->up_ipdev, pack,
0062104                            ICMP_PORT_UNRCH);
icmp_snd_unreachable()

icmp_snd_unreachable(port_nr, pack, code) builds an icmp unreachable packet (partially using the ip packet pack, icmp_snd_unreachable()'s second parameter) and then places the icmp unreachable packet in the outgoing queue. Icmp unreachable packets are sent if the network, host, or port number specified by the ip packet pack is unreachable.

The function first calls icmp_err_pack() to build a generic icmp packet, sets the ih_type field of the icmp header to ICMP_TYPE_DST_UNRCH and recalibrates the checksum (since the type and code fields of the icmp header have changed) of the icmp header before placing the packet in the icmp port's write queue.


0062105                   return;
0062106          }
0062107 
Free up the other accessors. They are also no longer needed.


0062108          assert (ip_hdr_acc);
0062109          bf_afree(ip_hdr_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062110          assert (udp_acc);
0062111          bf_afree(udp_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062112 }
0062113 
0062114 PUBLIC void udp_close(fd)
0062115 int fd;
udp_close()

udp_close() closes a udp file descriptor in response to a DEV_CLOSE message received from the file system (FS). More specifically, udp_close() marks the udp file descriptor as available, removes the udp file descriptor from its udp file descriptor linked list within the udp port, and empties the read queue of the udp file descriptor.


0062116 {
0062117          udp_fd_t *udp_fd;
0062118          acc_t *tmp_acc, *next_acc;
0062119 
0062120          udp_fd= &udp_fd_table[fd];
Find the udp file descriptor with an index of fd (the first parameter of udp_ioctl()) within udp_fd_table[].


0062121 
0062122          assert (udp_fd->uf_flags & UFF_INUSE);
0062123 
0062124          if (udp_fd->uf_flags & UFF_OPTSET)
0062125                   unhash_fd(udp_fd);
hash_fd() / unhash_fd() / udp

Hash tables enable quick lookups. The hash table used by the udp code enables a udp file descriptor to be found quickly using its common udp port number (0-65535).

hash_fd(udp_fd) either places the udp file descriptor udp_fd, hash_fd()'s only parameter, at the head of the up_port_any linked list of the file descriptor's udp port (if the file descriptor is not associated with a specific udp port) or the function calculates the hash of the udp file descriptor based on its udp port number and places it at the head of the linked list for that hash value (e.g., up_port_hash[10)).

unhash_fd(udp_fd) removes the udp file descriptor udp_fd, unhash_fd()'s only parameter, from the linked list where hash_fd() inserted it.

Note that the port used for the hash is not the physical udp port.


0062126 
0062127          udp_fd->uf_flags= UFF_EMPTY;
Mark the udp file descriptor as available.


0062128          tmp_acc= udp_fd->uf_rdbuf_head;
Lines 62128 - 62135 empty out the udp file descriptor's read queue.


0062129          while (tmp_acc)
0062130          {
0062131                   next_acc= tmp_acc->acc_ext_link;
0062132                   bf_afree(tmp_acc);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062133                   tmp_acc= next_acc;
0062134          }
0062135          udp_fd->uf_rdbuf_head= NULL;
0062136 }
0062137 
0062138 PUBLIC int udp_write(fd, count)
0062139 int fd;
0062140 size_t count;
Before studying udp_write() and restart_write_fd(), it is worthwhile to discuss ip_write(). ip_write() was rewritten and, as a result, no longer returns anything other than NW_OK. This significantly simplifies what udp_write() and restart_write_fd() do. However, neither udp_write() nor restart_write_fd() were rewritten to reflect the changes in ip_write(). There are several blocks of code that can be removed in both functions as well as in udp_get_data(). Our comments will indicate which blocks could have been removed.

It should also be noted that the UFF_WRITE_IP, UPF_WRITE_IP, UPF_MORE2WRITE, and UPF_WRITE_SP flags can be effectively ignored. If a process writes a packet to a udp file descriptor, the packet is either immediately handed off to the ip code or the packet is immediately dropped. In other words, by the time the next write to a udp file descriptor occurs, the udp code has forgotten about the previous write. Note that this does not mean that the packet can't be queued in the ethernet layer before being sent to the ethernet driver.


udp_write()


udp_write() gets a packet from a user process, adds a udp header and an ip header, and then passes the packet to the next lower layer (i.e., the ip code). udp_write() is called (indirectly) by sr_rwio(), which is called when the network service receives a write request from the file system (FS). Most of the work involved in moving the data from the udp layer to the ip layer is done by restart_write_fd().


udp write path


For a write to a udp device (e.g., /dev/udp), the code takes the following path:


sr_rwio()
udp_write()
restart_write_fd()
ip_write()
ip_send()
if (packet is destined to a system on the local ethernet network) {
ipeth_send()
if (no previous packet being processed by ethernet task)
eth_send()
if (eth_send() can't immediately send packet)
eth_write()
}
else if (packet must be routed)
oroute_frag()
else if (packet ist destined for a local destination)
ev_enqueue()



0062141 {
0062142          udp_fd_t *udp_fd;
0062143          udp_port_t *udp_port;
0062144 
Find the udp file descriptor whose index within udp_fd_table[] is fd, the udp_write()'s first parameter. Also, find the corresponding udp port for the udp file descriptor.


0062145          udp_fd= &udp_fd_table[fd];
0062146          udp_port= udp_fd->uf_port;
0062147 
0062148          if (!(udp_fd->uf_flags & UFF_OPTSET))
A udp file descriptor must be configured before it can be read from or written to.


0062149          {
0062150                   reply_thr_get (udp_fd, EBADMODE, FALSE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062151                   return NW_OK;
0062152          }
0062153 
0062154 assert (!(udp_fd->uf_flags & UFF_WRITE_IP));
0062155 
0062156          udp_fd->uf_wr_count= count;
count, udp_write()'s second parameter, is the number of bytes requested in the write operation.


0062157 
0062158          udp_fd->uf_flags |= UFF_WRITE_IP;
restart_write_fd() always clears the UFF_WRITE_IP flag and so this flag can be ignored. The UFF_WRITE_IP flag was probably meaningful before ip_write() was rewritten (see comment on line 62138).


0062159 
0062160          restart_write_fd(udp_fd);
restart_write_fd() / udp

restart_write_fd() gets data from the process that requested a write operation (by calling sr_get_userdata()) and then prepends an ip header and a udp header to the data before passing the packet on to the ip layer (by calling ip_write()).


0062161 
0062162          if (udp_fd->uf_flags & UFF_WRITE_IP)
Because the UPF_WRITE_IP flag is never set on line 62190 (see comment on the same line), the UFF_WRITE_IP flag will never be set at this point in the code and therefore ip_write() will always return NW_OK. restart_write_fd() always clears the UFF_WRITE_IP flag on line 62197.


0062163          {
0062164                   DBLOCK(1, printf("replying NW_SUSPEND\n"));
0062165 
0062166                   return NW_SUSPEND;
0062167          }
0062168          else
0062169          {
0062170                   return NW_OK;
0062171          }
0062172 }
0062173 
0062174 PRIVATE void restart_write_fd(udp_fd)
0062175 udp_fd_t *udp_fd;
restart_write_fd() / udp

restart_write_fd() gets data from the process that requested a write operation (by calling sr_get_userdata()) and then prepends an ip header and a udp header to the data before passing the packet on to the ip layer (by calling ip_write()).


0062176 {
0062177          udp_port_t *udp_port;
0062178          acc_t *pack, *ip_hdr_pack, *udp_hdr_pack, *ip_opt_pack, *user_data;
0062179          udp_hdr_t *udp_hdr;
0062180          udp_io_hdr_t *udp_io_hdr;
0062181          ip_hdr_t *ip_hdr;
0062182          size_t ip_opt_size, user_data_size;
0062183          unsigned long flags;
0062184          u16_t chksum;
0062185          u8_t u16[2];
0062186          int result;
0062187 
0062188          udp_port= udp_fd->uf_port;
0062189 
0062190          if (udp_port->up_flags & UPF_WRITE_IP)
The UPF_WRITE_IP flag will never be set at this point in the code. Here is the rationale behind this statement:

up_flags is initialized to UPF_EMPTY (i.e., UPF_WRITE_IP is not set) during the initialization of the udp port and therefore the UPF_WRITE_IP flag is not set the first time that this point in the code is reached. On line 62323, immediately before the call to ip_write(), the UPF_WRITE_IP flag is set. However, regardless whether ip_write() is able to satisfy the write request, ip_write() will always call error_reply(), which calls udp_get_data(). In this scenario, udp_get_data()will always clear the UPF_WRITE_IP flag since error_reply() calls udp_get_data() with its third argument equal to 0.

To better understand why the UPF_WRITE_IP flag is never set, it is helpful to study ip_write().


0062191          {
0062192                   udp_port->up_flags |= UPF_MORE2WRITE;
For the reason given above, UPF_MORE2WRITE will never be set.


0062193                   return;
0062194          }
0062195 
0062196 assert (udp_fd->uf_flags & UFF_WRITE_IP);
0062197          udp_fd->uf_flags &= ~UFF_WRITE_IP;
The UFF_WRITE_IP flag is significant on line 62162 above. Since the UPF_WRITE_IP flag will never be set on line 62190 above, restart_write_fd() will never return before clearing the UFF_WRITE_IP flag here and, therefore, the UFF_WRITE_IP flag will never be set on line 62162.


0062198 
0062199 assert (!udp_port->up_wr_pack);
0062200 
0062201          pack= (*udp_fd->uf_get_userdata)(udp_fd->uf_srfd, 0,
0062202                   udp_fd->uf_wr_count, FALSE);
The uf_get_userdata field references sr_get_userdata() (see line 61257). sr_get_userdata() gets a packet from the process that sent the write request and places the packet in a buffer within the network service.


sr_get_userdata()


sr_get_userdata() is the counterpart to sr_put_userdata() and does one of two things:

1) Copies data from a user process to a buffer (to be more specific, a chain of accessors) within the network service (this process). This can be either ioctl data (in which case, for_ioctl is TRUE) or data. For example, udp_setopt() (indirectly) calls sr_get_userdata() to get configuration data. Also, restart_write_fd() (indirectly) calls sr_get_userdata() before passing data onto the ip code.

2) Sends a REVIVE message to the file system (FS). For example, if an illegal option is selected while configuring a udp file descriptor, reply_thr_get() is called, which then (indirectly) calls sr_get_userdata(), passing in EBADMODE for the parameter count. restart_write_fd() also (indirectly) calls sr_get_userdata() to send a REVIVE message back to the FS indicating the number of bytes read after copying the data from the user process.

sr_get_userdata() is often called twice in close succession. The first time to attempt to copy the data from the user process and then the second time to send a message to the FS indicating whether the copy operation was successful and, if it was successful, the number of bytes copied.

In my opinion, like sr_put_userdata(), this function should have been made into two functions. As it is, it is too confusing.


0062203          if (!pack)
sr_get_userdata() was not able to get the packet from the user process. Report the failure to the file system (FS).


0062204          {
0062205                   udp_fd->uf_flags &= ~UFF_WRITE_IP;
0062206                   reply_thr_get (udp_fd, EFAULT, FALSE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062207                   return;
0062208          }
0062209 
0062210          flags= udp_fd->uf_udpopt.nwuo_flags;
The udp file descriptor's flags determine whether certain fields within the headers come from fields within the file descriptor or fields within the pseudo udp header. For example, if the udp file descriptor's NWUO_RA_SET flag is set, the remote address field of the ip header comes from the nwuo_remaddr field of the file descriptor and not from the pseudo udp header (see lines 62267 - 62277).


0062211 
Since an ip header and a udp header will be needed, allocate buffers for both. Lines 62212 - 62216 allocate buffers for the headers.


0062212          ip_hdr_pack= bf_memreq(IP_MIN_HDR_SIZE);
IP_MIN_HDR_SIZE is #define'd in include/net/gen/in.h:

#define IP_MIN_HDR_SIZE 20


bf_memreq()


After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0062213          ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_hdr_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.ip_hdr_t

struct ip_hdr_t is the structure of an ip header. "ih" (e.g., ih_src, ih_dst) stands for "Ip Header".

ip_hdr_t is declared in /include/net/gen/ip_hdr.h:

typedef struct ip_hdr

{
u8_t ih_vers_ihl, ih_tos;
u16_t ih_length, ih_id, ih_flags_fragoff;
u8_t ih_ttl, ih_proto;
u16_t ih_hdr_chk;
ipaddr_t ih_src, ih_dst;
} ip_hdr_t;

ih_vers_ihl: The lower 4 bits is the length of the header plus options (if there are any) shifted by 2 bit positions (i.e., its actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

The upper four bits is the version number (e.g., IPv4).


ih_tos: tos stands for "Type Of Service" and is the priority of the ip packet. A value of zero is the lowest priority. Both UDP and TCP have a default TOS of zero.

#define TCP_DEF_TOS 0
#define UDP_TOS 0


ih_length: The length of the entire ip packet, including the ip header.


ih_id: The value of ih_id for the first packet sent out is determined by ip_init() and is equal to the number of clock ticks since reboot (i.e., the value returned by get_time) and is incremented for each packet sent out. This value is used to combine fragments at the receiving end if fragmentation has occurred.


ih_flags_fragoff: ih_flags_fragoff is a combination of flags and a (possible) fragmentation offset ("fragoff").

If the packet should not be fragmented, ih_flags_fragoff is set to IH_DONT_FRAG. If there are additional fragments (e.g., the 3rd fragment of 4 fragments), ih_flags_fragoff is set to IH_MORE_FRAGS.

If the packet is indeed just a fragment of a packet, this value indicates the starting byte position (in 8 byte increments) of the original ip packet's data. So for example, if an ip packet of data size (not including the ip header) is broken up into two fragments of 1496 and 504 bytes each, the first fragment would have a fragmentation offset of 0 bytes and the second fragment would have a fragmentation offset of 1496 bytes and ih_flags_fragoff is therefore 187 (1496 / 8 = 187).


ih_ttl: "Time to live" for the packet. As a packet is routed to the destination, each router decrements the packet's ttl. When the ttl reaches 0, the router sends an "icmp unreachable" packet to the source. The ttl is designed to prevent packets that can't reach their destination from indefinitely bouncing around between routers. UDP's default TTL is 30:

#define UDP_TTL 30

Note that the Minix code also uses this value as a timeout value (in seconds). This code was written before the ttl field was redefined to be strictly a hope count. The original IP RFC defines the ttl field as the time to live in seconds.


ih_proto: The protocol of the ip packet. For example, if the packet is a udp packet, ih_proto will be 17. If the packet is a tcp packet, ih_proto will be 6.


ih_hdr_chk: Checksum for the header.


ih_src, ih_dst: Source and destination ip address of the ip packet.


IP HEADER (as given by RFC 791)


0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



0062214 
0062215          udp_hdr_pack= bf_memreq(UDP_HDR_SIZE);
UDP_HDR_SIZE, the header size of a udp header in bytes is #define'd in include/net/gen/udp.h:

#define UDP_HDR_SIZE 8


bf_memreq()


After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0062216          udp_hdr= (udp_hdr_t *)ptr2acc_data(udp_hdr_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0062217 
0062218          if (flags & NWUO_RWDATALL)
From ip(4):

"The NWUO_RWDATALL mode presents the data part of a UDP packet with a header that contains the source and destination IP address, source and destination UDP ports, the IP options, etc. The server expects such a header in front of the data to be transmitted."

In other words, if the udp file descriptor's NWUO_RWDATALL flag is set, the user must supply the source and destination data in addition to the payload. This data is specified in a pseudo udp header (i.e., a udp_io_hdr struct).


udp_io_hdr


The "pseudo" udp header (not the standard udp header) is of type uih_io_hdr_t. uih_io_hdr_t is declared in include/net/gen/udp_hdr.h:

typedef struct udp_io_hdr

{
ipaddr_t uih_src_addr;
ipaddr_t uih_dst_addr;
udpport_t uih_src_port;
udpport_t uih_dst_port;
u16_t uih_ip_opt_len;
u16_t uih_data_len;
} udp_io_hdr_t;
uih_src_addr, uih_dst_addr, uih_src_port, uih_dst_port: Source and destination ip addresses and ports

uih_ip_opt_len: length of the ip options (zero if none exist)

uih_data_len: length of the data

If a udp file descriptor is configured appropriately, a process writing data to the udp file descriptor must prepend a pseudo udp header to the data, thereby specifying the values (given above) in the outgoing udp and ip headers. A udp pseudo header is also prepended to an otherwise header-less packet being copied to the process that requested a read.


0062219          {
0062220                   pack= bf_packIffLess(pack, UDP_IO_HDR_SIZE);
UDP_IO_HDR_SIZE is #define'd in iclude/net/gen/udp.h:

#define UDP_IO_HDR_SIZE 16


bf_packIffLess()


If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0062221                   udp_io_hdr= (udp_io_hdr_t *)ptr2acc_data(pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0062222 #if CONF_UDP_IO_NW_BYTE_ORDER
0062223                   ip_opt_size= ntohs(udp_io_hdr->uih_ip_opt_len);
0062224 #else
0062225                   ip_opt_size= udp_io_hdr->uih_ip_opt_len;
0062226 #endif
ip_opt_size is the size of the ip header options.


0062227                   if (UDP_IO_HDR_SIZE+ip_opt_size>udp_fd->uf_wr_count)
Verify that uf_wr_count, the number of bytes to be written, makes sense. At the very minimum (i.e., if the packet had no payload), uf_wr_count must be at least as large as the pseudo udp header and the size of the ip options (as indicated by the pseudo udp header).


0062228                   {
0062229                            bf_afree(ip_hdr_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062230                            bf_afree(udp_hdr_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062231                            bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062232                            reply_thr_get (udp_fd, EINVAL, FALSE);
0062233                            return;
0062234                   }
ip options

Ip options are an assortment of settings related to the ip protocol. However, most ip options involve the path that the ip packet takes to its destination. Ip options are discussed in detail on page 14 of RFC 791 as well as by the Data Network Resource.

Ip options are optional. If there are ip options, the ip options must be placed between the ip header and the udp header (for a udp packet):



The maximum size of the ip options is 40 bytes and must be a multiple of 4 bytes (RFC 791). This is a real limitation since the number of router ip addresses that can be stored in the ip options is very limited.


0062235                   if (ip_opt_size & 3)
Verify that the size of the ip options is a multiple of 4. The maximum size of the ip options is 40 bytes and must be a multiple of 4 bytes (RFC 791).


0062236                   {
0062237                            bf_afree(ip_hdr_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062238                            bf_afree(udp_hdr_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062239                            bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062240                            reply_thr_get (udp_fd, EFAULT, FALSE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062241                            return;
0062242                   }
0062243                   if (ip_opt_size)
0062244                            ip_opt_pack= bf_cut(pack, UDP_IO_HDR_SIZE, ip_opt_size);
ip_opt_pack (a linked list of accessors) now holds the ip options. If ip_opt_pack is null, there are no options (see next line).


bf_cut()


If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0062245                   else
0062246                            ip_opt_pack= 0;
0062247                   user_data_size= udp_fd->uf_wr_count-UDP_IO_HDR_SIZE-
0062248                            ip_opt_size;
0062249                   user_data= bf_cut(pack, UDP_IO_HDR_SIZE+ip_opt_size,
0062250                            user_data_size);
user_data (a linked list of accessors) now holds the payload data (with the pseudo header and ip options stripped).


0062251                   bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062252          }
0062253          else
From ip(4):

"The NWUO_RWDATONLY flag specifies that only the data part of a UDP packet is sent to the server and only the data part is received from the server."

If the NWUO_RWDATONLY flag is set for a udp file descriptor, all packets written to the file descriptor are sent to the same ip address/udp port. This ip address and udp port are part of the udp file descriptor's configuration.



0062254          {
0062255                   udp_io_hdr= 0;
0062256                   ip_opt_size= 0;
0062257                   user_data_size= udp_fd->uf_wr_count;
0062258                   ip_opt_pack= 0;
0062259                   user_data= pack;
0062260          }
0062261 
Lines 62262 - 62266 fill in several fields of the ip header.


ip_hdr_t


struct ip_hdr_t is the structure of an ip header. "ih" (e.g., ih_src, ih_dst) stands for "Ip Header".

ip_hdr_t is declared in /include/net/gen/ip_hdr.h:

typedef struct ip_hdr

{
u8_t ih_vers_ihl, ih_tos;
u16_t ih_length, ih_id, ih_flags_fragoff;
u8_t ih_ttl, ih_proto;
u16_t ih_hdr_chk;
ipaddr_t ih_src, ih_dst;
} ip_hdr_t;

ih_vers_ihl: The lower 4 bits is the length of the header plus options (if there are any) shifted by 2 bit positions (i.e., its actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

The upper four bits is the version number (e.g., IPv4).


ih_tos: tos stands for "Type Of Service" and is the priority of the ip packet. A value of zero is the lowest priority. Both UDP and TCP have a default TOS of zero.

#define TCP_DEF_TOS 0
#define UDP_TOS 0


ih_length: The length of the entire ip packet, including the ip header.


ih_id: The value of ih_id for the first packet sent out is determined by ip_init() and is equal to the number of clock ticks since reboot (i.e., the value returned by get_time) and is incremented for each packet sent out. This value is used to combine fragments at the receiving end if fragmentation has occurred.


ih_flags_fragoff: ih_flags_fragoff is a combination of flags and a (possible) fragmentation offset ("fragoff").

If the packet should not be fragmented, ih_flags_fragoff is set to IH_DONT_FRAG. If there are additional fragments (e.g., the 3rd fragment of 4 fragments), ih_flags_fragoff is set to IH_MORE_FRAGS.

If the packet is indeed just a fragment of a packet, this value indicates the starting byte position (in 8 byte increments) of the original ip packet's data. So for example, if an ip packet of data size (not including the ip header) is broken up into two fragments of 1496 and 504 bytes each, the first fragment would have a fragmentation offset of 0 bytes and the second fragment would have a fragmentation offset of 1496 bytes and ih_flags_fragoff is therefore 187 (1496 / 8 = 187).


ih_ttl: "Time to live" for the packet. As a packet is routed to the destination, each router decrements the packet's ttl. When the ttl reaches 0, the router sends an "icmp unreachable" packet to the source. The ttl is designed to prevent packets that can't reach their destination from indefinitely bouncing around between routers. UDP's default TTL is 30:

#define UDP_TTL 30

Note that the Minix code also uses this value as a timeout value (in seconds). This code was written before the ttl field was redefined to be strictly a hope count. The original IP RFC defines the ttl field as the time to live in seconds.


ih_proto: The protocol of the ip packet. For example, if the packet is a udp packet, ih_proto will be 17. If the packet is a tcp packet, ih_proto will be 6.


ih_hdr_chk: Checksum for the header.


ih_src, ih_dst: Source and destination ip address of the ip packet.


IP HEADER (as given by RFC 791)


0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



0062262          ip_hdr->ih_vers_ihl= (IP_MIN_HDR_SIZE+ip_opt_size) >> 2;
0062263          ip_hdr->ih_tos= UDP_TOS;
0062264          ip_hdr->ih_flags_fragoff= HTONS(UDP_IP_FLAGS);
0062265          ip_hdr->ih_ttl= UDP_TTL;
0062266          ip_hdr->ih_proto= IPPROTO_UDP;
Depending on the settings of several flags in the udp file descriptor, several fields of the udp and ip header will be set to either values of the udp file descriptor or values of the pseudo udp header passed in with the data.

For a detailed description of the NWUO_... flags, click here.


0062267          if (flags & NWUO_RA_SET)
Get the value from the udp file descriptor.


0062268          {
0062269                   DBLOCK(1, printf("NWUO_RA_SET\n"));
0062270 
0062271                   ip_hdr->ih_dst= udp_fd->uf_udpopt.nwuo_remaddr;
0062272          }
0062273          else
Get the value from the udp pseudo header.


0062274          {
0062275 assert (udp_io_hdr);
0062276                   ip_hdr->ih_dst= udp_io_hdr->uih_dst_addr;
0062277          }
0062278 
0062279          if ((flags & NWUO_LOCPORT_MASK) != NWUO_LP_ANY)
Get the value from the udp file descriptor.


0062280                   udp_hdr->uh_src_port= udp_fd->uf_udpopt.nwuo_locport;
0062281          else
Get the value from the udp pseudo header.


0062282          {
0062283 assert (udp_io_hdr);
0062284                   udp_hdr->uh_src_port= udp_io_hdr->uih_src_port;
0062285          }
0062286 
0062287          if (flags & NWUO_RP_SET)
Get the value from the udp file descriptor.


0062288                   udp_hdr->uh_dst_port= udp_fd->uf_udpopt.nwuo_remport;
0062289          else
Get the value from the udp pseudo header.


0062290          {
0062291 assert (udp_io_hdr);
0062292                   udp_hdr->uh_dst_port= udp_io_hdr->uih_dst_port;
0062293          }
0062294 
udp_hdr

The udp header (not the udp pseudo header has the following format:

typedef struct udp_hdr

{
udpport_t uh_src_port;
udpport_t uh_dst_port;
u16_t uh_length;
u16_t uh_chksum;
} udp_hdr_t;

uh_src_port: Source port for the packet.

uh_dst_port: Destination port for the packet.

uh_length: Length of the udp header plus the length of the data.

uh_chksum: Checksum of the udp packet.


0062295          udp_hdr->uh_length= htons(UDP_HDR_SIZE+user_data_size);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0062296          udp_hdr->uh_chksum= 0;
Calculate the checksum of the packet.


oneC_sum()


A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062297 
0062298          udp_hdr_pack->acc_next= user_data;
The udp header and the data are linked to form the udp packet.

Lines 62299-62308 calculate the checksum of the udp header. The udp checksum is calculated with the following data:




0062299          chksum= pack_oneCsum(udp_hdr_pack);
pack_oneCsum()

pack_oneCsum() computes the checksum of a udp packet. It accomplishes this by computing the checksum (by calling oneC_sum() of each of the packet's buffers).

Note that a checksum is used to determine if errors occurred during the transmission of data.

pack_oneCsum() is very similar to icmp_pack_oneCsum(). The two functions probably should have been consolidated into one.


0062300          chksum= oneC_sum(chksum, (u16_t *)&udp_fd->uf_port->up_ipaddr,
0062301                   sizeof(ipaddr_t));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062302          chksum= oneC_sum(chksum, (u16_t *)&ip_hdr->ih_dst, sizeof(ipaddr_t));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062303          u16[0]= 0;
0062304          u16[1]= IPPROTO_UDP;
0062305          chksum= oneC_sum(chksum, (u16_t *)u16, sizeof(u16));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062306          chksum= oneC_sum(chksum, (u16_t *)&udp_hdr->uh_length, sizeof(u16_t));
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062307          if (~chksum)
0062308                   chksum= ~chksum;
0062309          udp_hdr->uh_chksum= chksum;
0062310          
0062311          if (ip_opt_pack)
If there are ip options, prepend them to the udp packet.


0062312          {
0062313                   ip_opt_pack= bf_packIffLess(ip_opt_pack, ip_opt_size);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0062314                   ip_opt_pack->acc_next= udp_hdr_pack;
0062315                   udp_hdr_pack= ip_opt_pack;
0062316          }
0062317          ip_hdr_pack->acc_next= udp_hdr_pack;
Link the ip header to the rest of the packet.


0062318 
0062319 assert (!udp_port->up_wr_pack);
0062320 assert (!(udp_port->up_flags & UPF_WRITE_IP));
0062321 
0062322          udp_port->up_wr_pack= ip_hdr_pack;
Place the packet in the write queue of the udp port that is associated with the udp file descriptor and indicate that a write operation is underway for the udp port (next line).


0062323          udp_port->up_flags |= UPF_WRITE_IP;
The UPF_WRITE_IP flag will always be (indirectly) cleared by ip_write(). See the comments on lines 62158 and 62162.


0062324          result= ip_write(udp_port->up_ipfd, bf_bufsize(ip_hdr_pack));
The up_ipfd field of a udp port is the udp port's associated ip file descriptor.


ip_write()


ip_write() simply gets an ip packet from a higher layer and then calls ip_send().

For example, after assembling an ip packet and placing the packet in the write queue of the appropriate udp port, udp's restart_write_fd() calls ip_write(), which then calls udp_get_data() to get the packet from the queue.

For a write to a udp file descriptor, ip_write()'s position in the big picture is as follows:

It is important to note that ip_write() ALWAYS RETURNS NW_OK! In previous versions of the network service, ip_write() returned other values (including NW_SUSPEND).


0062325          if (result == NW_SUSPEND)
ip_write() ALWAYS RETURNS NW_OK! An older version of ip_write() returned NW_SUSPEND. Lines 62326-62330 and line 62332 will never be executed.



0062326          {
0062327                   udp_port->up_flags |= UPF_WRITE_SP;
0062328                   udp_fd->uf_flags |= UFF_WRITE_IP;
0062329                   udp_port->up_write_fd= udp_fd;
0062330          }
0062331          else if (result<0)
As described above, this conditional will never be true.


0062332                   reply_thr_get(udp_fd, result, FALSE);
0062333          else
0062334                   reply_thr_get (udp_fd, udp_fd->uf_wr_count, FALSE);
As described above, this call to reply_thru_get() will always be executed.

Return to the file system the number of bytes that were written.


reply_thr_get() / reply_thr_put() / udp


reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062335 }
0062336 
0062337 PRIVATE u16_t pack_oneCsum(pack)
0062338 acc_t *pack;
pack_oneCsum()

pack_oneCsum() computes the checksum of a udp packet. It accomplishes this by computing the checksum (by calling oneC_sum() of each of the packet's buffers).

Note that a checksum is used to determine if errors occurred during the transmission of data.

pack_oneCsum() is very similar to icmp_pack_oneCsum(). The two functions probably should have been consolidated into one.


0062339 {
0062340          u16_t prev;
0062341          int odd_byte;
0062342          char *data_ptr;
0062343          int length;
0062344          char byte_buf[2];
0062345 
0062346          assert (pack);
0062347 
0062348          prev= 0;
0062349 
0062350          odd_byte= FALSE;
0062351          for (; pack; pack= pack->acc_next)
Go through each of the buffers, adding up each buffer's checksum.

The trickiest part of this loop is handling the (possible) odd byte at the end of a buffer. If there is an odd number of bytes in a buffer, the odd byte at the end is checksummed with the first byte of the next buffer and then the remaining bytes of the buffer are checksummed together.


0062352          {
0062353                   
0062354                   data_ptr= ptr2acc_data(pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0062355                   length= pack->acc_length;
0062356 
0062357                   if (!length)
0062358                            continue;
0062359                   if (odd_byte)
0062360                   {
0062361                            byte_buf[1]= *data_ptr;
0062362                            prev= oneC_sum(prev, (u16_t *)byte_buf, 2);
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062363                            data_ptr++;
0062364                            length--;
0062365                            odd_byte= FALSE;
0062366                   }
0062367                   if (length & 1)
0062368                   {
0062369                            odd_byte= TRUE;
0062370                            length--;
0062371                            byte_buf[0]= data_ptr[length];
0062372                   }
0062373                   if (!length)
0062374                            continue;
0062375                   prev= oneC_sum (prev, (u16_t *)data_ptr, length);
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062376          }
0062377          if (odd_byte)
0062378          {
0062379                   byte_buf[1]= 0;
0062380                   prev= oneC_sum (prev, (u16_t *)byte_buf, 1);
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0062381          }
0062382          return prev;
0062383 }
0062384 
0062385 PRIVATE void udp_restart_write_port(udp_port )
0062386 udp_port_t *udp_port;
Since UPF_WRITE_SP is never set, udp_restart_write_port() is never called. Therefore, this function will not be documented.

In previous versions of the network service, ip_write() (see line 62324) returned values other than NW_SUSPEND as well as NW_OK. Currently, ip_write() only returns NW_OK. Therefore, the UPF_WRITE_SP flag is never set.


0062387 {
0062388          udp_fd_t *udp_fd;
0062389          int i;
0062390 
0062391 assert (!udp_port->up_wr_pack);
0062392 assert (!(udp_port->up_flags & (UPF_WRITE_IP|UPF_WRITE_SP)));
0062393 
0062394          while (udp_port->up_flags & UPF_MORE2WRITE)
0062395          {
0062396                   udp_port->up_flags &= ~UPF_MORE2WRITE;
0062397 
0062398                   for (i= 0, udp_fd= udp_port->up_next_fd; i<UDP_FD_NR;
0062399                            i++, udp_fd++)
0062400                   {
0062401                            if (udp_fd == &udp_fd_table[UDP_FD_NR])
0062402                                     udp_fd= udp_fd_table;
0062403 
0062404                            if (!(udp_fd->uf_flags & UFF_INUSE))
0062405                                     continue;
0062406                            if (!(udp_fd->uf_flags & UFF_WRITE_IP))
0062407                                     continue;
0062408                            if (udp_fd->uf_port != udp_port)
0062409                                     continue;
0062410                            restart_write_fd(udp_fd);
0062411                            if (udp_port->up_flags & UPF_WRITE_IP)
0062412                            {
0062413                                     udp_port->up_next_fd= udp_fd+1;
0062414                                     udp_port->up_flags |= UPF_MORE2WRITE;
0062415                                     return;
0062416                            }
0062417                   }
0062418          }
0062419 }
0062420 
0062421 PUBLIC int udp_cancel(fd, which_operation)
0062422 int fd;
0062423 int which_operation;
udp_cancel()

If a message from the file system arrives requesting the cancellation for a previous request which has not begun to be processed, sr_rec() takes the message out of the queue. However, if the original request is already being processed, udp_cancel() cancels the operation.

udp_cancel() always returns NW_OK.


0062424 {
0062425          udp_fd_t *udp_fd;
0062426 
0062427          DBLOCK(0x10, printf("udp_cancel(%d, %d)\n", fd, which_operation));
0062428 
0062429          udp_fd= &udp_fd_table[fd];
Find the udp file descriptor with an index of fd (the first parameter of udp_ioctl()) within udp_fd_table[].


0062430 
Clear the appropriate flag and call reply_thr_put() or reply_thr_get().


0062431          switch (which_operation)
0062432          {
0062433          case SR_CANCEL_READ:
0062434 assert (udp_fd->uf_flags & UFF_READ_IP);
0062435                   udp_fd->uf_flags &= ~UFF_READ_IP;
0062436                   reply_thr_put(udp_fd, EINTR, FALSE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062437                   break;
0062438          case SR_CANCEL_WRITE:
0062439 assert (udp_fd->uf_flags & UFF_WRITE_IP);
0062440                   udp_fd->uf_flags &= ~UFF_WRITE_IP;
0062441                   if (udp_fd->uf_port->up_write_fd == udp_fd)
0062442                            udp_fd->uf_port->up_write_fd= NULL;
If a write operation is suspended, up_write_fd is the udp file descriptor whose write operation is suspended. Clear the field.


0062443                   reply_thr_get(udp_fd, EINTR, FALSE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062444                   break;
0062445          case SR_CANCEL_IOCTL:
0062446 assert (udp_fd->uf_flags & UFF_IOCTL_IP);
0062447                   udp_fd->uf_flags &= ~UFF_IOCTL_IP;
0062448                   reply_thr_get(udp_fd, EINTR, TRUE);
reply_thr_get() / reply_thr_put() / udp

reply_thr_get() calls (indirectly) sr_get_userdata(), which reports to the file system (FS) whether a previous operation requested by a process was successful. reply, reply_thr_get()'s second parameter, indicates whether the previous operation was successful.

After sending the message to the FS, sr_get_userdata() processes the messages in the write or ioctl queue.

reply_thr_put() does nearly the same thing as reply_thr_get(). However, instead of reporting whether write and ioctl operations were successful, reply_thr_put() reports on read and ioctl operations.


0062449                   break;
0062450 #if !CRAMPED
0062451          default:
0062452                   ip_panic(( "got unknown cancel request" ));
0062453 #endif
hash_fd() / unhash_fd() / udp

Hash tables enable quick lookups. The hash table used by the udp code enables a udp file descriptor to be found quickly using its common udp port number (0-65535).

hash_fd(udp_fd) either places the udp file descriptor udp_fd, hash_fd()'s only parameter, at the head of the up_port_any linked list of the file descriptor's udp port (if the file descriptor is not associated with a specific udp port) or the function calculates the hash of the udp file descriptor based on its udp port number and places it at the head of the linked list for that hash value (e.g., up_port_hash[10)).

unhash_fd(udp_fd) removes the udp file descriptor udp_fd, unhash_fd()'s only parameter, from the linked list where hash_fd() inserted it.

Note that the port used for the hash is not the physical udp port.


0062454          }
0062455          return NW_OK;
udp_cancel() always returns NW_OK.


0062456 }
0062457 
0062458 PRIVATE void udp_buffree (priority)
0062459 int priority;
udp_buffree()

udp_buffree(priority) is called by bf_memreq() if bf_memreq() does not have enough accessors to satisfy a buffer request. priority, udp_buffree()'s only parameter, will be either UDP_PRI_FDBUFS_EXTRA (#define'd as 5) or UDP_PRI_FDBUFS (#define'd as 6). If priority is UDP_PRI_FDBUFS_EXTRA, all packets in every except the last packet in the read queue in every udp file descriptor will be released. If priority is UDP_PRI_FDBUFS, all packets in every udp file descriptor will be freed. bf_memreq() calls udp_buffree(UDP_PRI_FDBUFS_EXTRA) first and, if more accessors are needed, then calls udp_buffree(UDP_PRI_FDBUFS).


0062460 {
0062461          int i;
0062462          time_t curr_tim;
0062463          udp_fd_t *udp_fd;
0062464          acc_t *tmp_acc, *next_acc;
0062465 
0062466          if (priority == UDP_PRI_FDBUFS_EXTRA)
Remove all packets except the last packet (most recently added) in the read queue of every udp file descriptor.


0062467          {
0062468                   for (i=0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++, udp_fd++)
0062469                   {
0062470                            while (udp_fd->uf_rdbuf_head &&
0062471                                     udp_fd->uf_rdbuf_head->acc_ext_link)
0062472                            {
0062473                                     tmp_acc= udp_fd->uf_rdbuf_head;
0062474                                     udp_fd->uf_rdbuf_head= tmp_acc->acc_ext_link;
0062475                                     bf_afree(tmp_acc);
0062476                            }
0062477                   }
0062478          }
0062479 
0062480          if (priority == UDP_PRI_FDBUFS)
Remove all packets in the read queue of every udp file descriptor.


0062481          {
0062482                   for (i=0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++, udp_fd++)
0062483                   {
0062484                            while (udp_fd->uf_rdbuf_head)
0062485                            {
0062486                                     tmp_acc= udp_fd->uf_rdbuf_head;
0062487                                     udp_fd->uf_rdbuf_head= tmp_acc->acc_ext_link;
0062488                                     bf_afree(tmp_acc);
0062489                            }
0062490                   }
0062491          }
0062492 }
0062493 
0062494 PRIVATE void udp_rd_enqueue(udp_fd, pack, exp_tim)
0062495 udp_fd_t *udp_fd;
0062496 acc_t *pack;
0062497 time_t exp_tim;
udp_rd_enqueue()

udp_rd_enqueue(udp_fd, pack, exp_tim) places the packet pack, udp_rd_enqueue()'s second parameter, into the read queue of the udp file descriptor udp_fd, udp_rd_enqueue()'s first parameter. If the packet is the first in the queue, udp_rd_enqueue() sets the packet's expiration time to exp_tim, udp_rd_enqueu()'s third parameter.

udp_rd_enqueue() is called in two places by udp_ip_arrived() after a packet arrives. If a read request was made on a udp file descriptor to which the packet is destined, the packet is delivered immediately. If a read request was not made, the packet remains in the udp file descriptor's read queue until the udp file descriptor makes a read request.


0062498 {
0062499          acc_t *tmp_acc;
0062500 
0062501          if (pack->acc_linkC != 1)
If there are multiple references to the packet, make a copy of it and use the copy as pack (the packet that is manipulated in this function).


0062502          {
0062503                   tmp_acc= bf_dupacc(pack);
bf_dupacc()

bf_dupacc(acc_ptr) creates a new accessor that is a duplicate of acc_ptr, bf_dupacc()'s only parameter.

More specifically, bf_dupacc() removes an accessor from acc_freelist and copies the accessor referred to by acc_ptr and sets acc_linkC of the new accessor to one. If acc_next is non-null, bf_dupacc() also increments acc_linkC of acc_next (the next accessor in the linked list). And if acc_buffer is non-null, bf_dupacc() also increments buf_linkC of the buffer.

This process is best described by a diagram:



Note that the link counts (acc_linkC and buf_linkC) for accessors[65] and buffers512[127] are incremented.

Remember that free accessors associated with buffers reside on buf512_freelist and free accessors not associated with buffers reside on acc_freelist. In addition, acc_linkC is one or greater if the accessor is no longer on either of the freelists and is greater than one if more than one accessor refers to it (through acc_next). buf_linkC is one or greater if its associated accessor (or accessors) are not on buf512_freelist and is greater than one if more than one accessor refers to it (through acc_buffer).



0062504                   bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0062505                   pack= tmp_acc;
0062506          }
0062507          pack->acc_ext_link= NULL;
pack will be the last packet in the read queue.


0062508          if (udp_fd->uf_rdbuf_head == NULL)
Place the packet at its appropriate position in the read queue. Recall that acc_ext_link is used to link two packets.

If there are no packets already in the read queue, set the expiration timer. The packet (and all others that are added later) will be dropped if it is not delivered by this time (see line 61780).


0062509          {
0062510                   udp_fd->uf_exp_tim= exp_tim;
0062511                   udp_fd->uf_rdbuf_head= pack;
0062512          }
0062513          else
0062514                   udp_fd->uf_rdbuf_tail->acc_ext_link= pack;
0062515          udp_fd->uf_rdbuf_tail= pack;
0062516 }
0062517 
0062518 PRIVATE void hash_fd(udp_fd)
0062519 udp_fd_t *udp_fd;
hash_fd() / unhash_fd() / udp

Hash tables enable quick lookups. The hash table used by the udp code enables a udp file descriptor to be found quickly using its common udp port number (0-65535).

hash_fd(udp_fd) either places the udp file descriptor udp_fd, hash_fd()'s only parameter, at the head of the up_port_any linked list of the file descriptor's udp port (if the file descriptor is not associated with a specific udp port) or the function calculates the hash of the udp file descriptor based on its udp port number and places it at the head of the linked list for that hash value (e.g., up_port_hash[10)).

unhash_fd(udp_fd) removes the udp file descriptor udp_fd, unhash_fd()'s only parameter, from the linked list where hash_fd() inserted it.

Note that the port used for the hash is not the physical udp port.


0062520 {
0062521          udp_port_t *udp_port;
0062522          int hash;
0062523 
0062524          udp_port= udp_fd->uf_port;
0062525          if ((udp_fd->uf_udpopt.nwuo_flags & NWUO_LOCPORT_MASK) ==
0062526                   NWUO_LP_ANY)
From ip(4):

"NWUO_LP_ANY does not select a port. Reception of data is therefore not possible but it is possible to send data."

If this udp file descriptor has the NWUO_LP_ANY flag set, place the file descriptor at the head of the up_port_any linked list.


0062527          {
0062528                   udp_fd->uf_port_next= udp_port->up_port_any;
0062529                   udp_port->up_port_any= udp_fd;
0062530          }
0062531          else
The udp file descriptor is associated with a specific port.


0062532          {
0062533                   hash= udp_fd->uf_udpopt.nwuo_locport;
For a udp port number of 52358:

hash = 1100110010000110 (binary)


0062534                   hash ^= (hash >> 8);
hash = 1100110010000110 ^ (1100110010000110 >> 8)
= 1100110010000110 ^ 11001100
= 1100110001001010


0062535                   hash &= (UDP_PORT_HASH_NR-1);
hash = 1100110001001010 & (UDP_PORT_HASH_NR-1)
= 1100110001001010 & 15
= 1100110001001010 & 1111 = 1010 = 10 (decimal)

Therefore, a udp file descriptor (udp_fd) will be placed at the head of the 11th element of the port hash table (i.e., up_port_hash[10]).


0062536 
0062537                   udp_fd->uf_port_next= udp_port->up_port_hash[hash];
0062538                   udp_port->up_port_hash[hash]= udp_fd;
0062539          }
0062540 }
0062541 
0062542 PRIVATE void unhash_fd(udp_fd)
0062543 udp_fd_t *udp_fd;
hash_fd() / unhash_fd() / udp

Hash tables enable quick lookups. The hash table used by the udp code enables a udp file descriptor to be found quickly using its common udp port number (0-65535).

hash_fd(udp_fd) either places the udp file descriptor udp_fd, hash_fd()'s only parameter, at the head of the up_port_any linked list of the file descriptor's udp port (if the file descriptor is not associated with a specific udp port) or the function calculates the hash of the udp file descriptor based on its udp port number and places it at the head of the linked list for that hash value (e.g., up_port_hash[10)).

unhash_fd(udp_fd) removes the udp file descriptor udp_fd, unhash_fd()'s only parameter, from the linked list where hash_fd() inserted it.

Note that the port used for the hash is not the physical udp port.


0062544 {
0062545          udp_port_t *udp_port;
0062546          udp_fd_t *prev, *curr, **udp_fd_p;
0062547          int hash;
0062548 
0062549          udp_port= udp_fd->uf_port;
0062550          if ((udp_fd->uf_udpopt.nwuo_flags & NWUO_LOCPORT_MASK) ==
0062551                   NWUO_LP_ANY)
From ip(4):

"NWUO_LP_ANY does not select a port. Reception of data is therefore not possible but it is possible to send data."

If this udp file descriptor has the NWUO_LP_ANY flag set, the file descriptor is in the up_port_any linked list of the udp port.


0062552          {
0062553                   udp_fd_p= &udp_port->up_port_any;
0062554          }
0062555          else
The udp file descriptor is associated with a specific port.


0062556          {
0062557                   hash= udp_fd->uf_udpopt.nwuo_locport;
For a udp port number of 52358:

hash = 1100110010000110 (binary)



0062558                   hash ^= (hash >> 8);
hash = 1100110010000110 ^ (1100110010000110 >> 8)
= 1100110010000110 ^ 11001100
= 1100110001001010



0062559                   hash &= (UDP_PORT_HASH_NR-1);
hash = 1100110001001010 & (UDP_PORT_HASH_NR-1)
= 1100110001001010 & 15
= 1100110001001010 & 1111 = 1010 = 10 (decimal)


0062560 
0062561                   udp_fd_p= &udp_port->up_port_hash[hash];
udp_fd_p points to the head of the linked list for the given hash number.


0062562          }
Find the udp file descriptor in the linked list found on line 62561.


0062563          for (prev= NULL, curr= *udp_fd_p; curr;
0062564                   prev= curr, curr= curr->uf_port_next)
0062565          {
0062566                   if (curr == udp_fd)
0062567                            break;
0062568          }
0062569          assert(curr);
Remove the udp file descriptor from the linked list.


0062570          if (prev)
0062571                   prev->uf_port_next= curr->uf_port_next;
0062572          else
0062573                   *udp_fd_p= curr->uf_port_next;
0062574 }
0062575 

The following is not used for the default configuration.




0062576 #ifdef BUF_CONSISTENCY_CHECK
0062577 PRIVATE void udp_bufcheck()
0062578 {
0062579          int i;
0062580          udp_port_t *udp_port;
0062581          udp_fd_t *udp_fd;
0062582          acc_t *tmp_acc;
0062583 
0062584          for (i= 0, udp_port= udp_port_table; i<ip_conf_nr; i++, udp_port++)
0062585          {
0062586                   if (udp_port->up_wr_pack)
0062587                            bf_check_acc(udp_port->up_wr_pack);
0062588          }
0062589 
0062590          for (i= 0, udp_fd= udp_fd_table; i<UDP_FD_NR; i++, udp_fd++)
0062591          {
0062592                   for (tmp_acc= udp_fd->uf_rdbuf_head; tmp_acc;
0062593                            tmp_acc= tmp_acc->acc_ext_link)
0062594                   {
0062595                            bf_check_acc(tmp_acc);
0062596                   }
0062597          }
0062598 }
0062599 #endif
0062600 
0062601 /*
0062602  * $PchId: udp.c,v 1.10 1996/08/06 06:48:05 philip Exp $
0062603  */